[PATCH 6/7] util: Check for pkttyagent availability properly

2021-11-20 Thread Martin Kletzander
It does not need a tty to work, it opens its controlling terminal for user
interaction and with this patch even crazy things like this work:

  echo 'list --name' | virsh -q >/dev/null

Signed-off-by: Martin Kletzander 
---
 src/util/virpolkit.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/util/virpolkit.c b/src/util/virpolkit.c
index 63bb8133a8aa..7156adc10c0a 100644
--- a/src/util/virpolkit.c
+++ b/src/util/virpolkit.c
@@ -180,9 +180,9 @@ virPolkitAgentCreate(void)
 int outfd = STDOUT_FILENO;
 int errfd = STDERR_FILENO;
 
-if (!isatty(STDIN_FILENO)) {
+if (!virPolkitAgentAvailable()) {
 virReportError(VIR_ERR_SYSTEM_ERROR, "%s",
-   _("Cannot start polkit text agent without a tty"));
+   _("polkit text authentication agent unavailable"));
 goto error;
 }
 
-- 
2.34.0



[PATCH 0/7] Polkit tty agent fixes

2021-11-20 Thread Martin Kletzander
Apart from fixing bz 1945501 [0] there are some small changes/fixes to some of
the polkit code.

[0] https://bugzilla.redhat.com/show_bug.cgi?id=1945501

Martin Kletzander (7):
  virsh: Remove needless variable
  util: Tiny reword fix in comment
  util: Add virPolkitAgentAvailable
  virsh: Do not try connecting first time without polkit agent
  util: Report errors in all code paths in virPolkitAgentCreate
  util: Check for pkttyagent availability properly
  util: Make client-side polkit work even with polkit disabled

 src/libvirt_private.syms |   1 +
 src/util/virpolkit.c | 202 ++-
 src/util/virpolkit.h |   1 +
 tools/virsh.c|  14 ++-
 4 files changed, 127 insertions(+), 91 deletions(-)

-- 
2.34.0



[PATCH 1/7] virsh: Remove needless variable

2021-11-20 Thread Martin Kletzander
It only redundantly reflects whether pkagent != NULL.

Signed-off-by: Martin Kletzander 
---
 tools/virsh.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tools/virsh.c b/tools/virsh.c
index b9f3f851d3ec..da35c5c2b9c1 100644
--- a/tools/virsh.c
+++ b/tools/virsh.c
@@ -119,7 +119,6 @@ virshConnect(vshControl *ctl, const char *uri, bool 
readonly)
 bool keepalive_forced = false;
 virPolkitAgent *pkagent = NULL;
 int authfail = 0;
-bool agentCreated = false;
 
 if (ctl->keepalive_interval >= 0) {
 interval = ctl->keepalive_interval;
@@ -141,12 +140,11 @@ virshConnect(vshControl *ctl, const char *uri, bool 
readonly)
 goto cleanup;
 
 err = virGetLastError();
-if (!agentCreated &&
+if (!pkagent &&
 err && err->domain == VIR_FROM_POLKIT &&
 err->code == VIR_ERR_AUTH_UNAVAILABLE) {
-if (!pkagent && !(pkagent = virPolkitAgentCreate()))
+if (!(pkagent = virPolkitAgentCreate()))
 goto cleanup;
-agentCreated = true;
 } else if (err && err->domain == VIR_FROM_POLKIT &&
err->code == VIR_ERR_AUTH_FAILED) {
 authfail++;
-- 
2.34.0



[PATCH 5/7] util: Report errors in all code paths in virPolkitAgentCreate

2021-11-20 Thread Martin Kletzander
Signed-off-by: Martin Kletzander 
---
 src/util/virpolkit.c | 10 --
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/util/virpolkit.c b/src/util/virpolkit.c
index 3b333547d70b..63bb8133a8aa 100644
--- a/src/util/virpolkit.c
+++ b/src/util/virpolkit.c
@@ -180,8 +180,11 @@ virPolkitAgentCreate(void)
 int outfd = STDOUT_FILENO;
 int errfd = STDERR_FILENO;
 
-if (!isatty(STDIN_FILENO))
+if (!isatty(STDIN_FILENO)) {
+virReportError(VIR_ERR_SYSTEM_ERROR, "%s",
+   _("Cannot start polkit text agent without a tty"));
 goto error;
+}
 
 if (virPipe(pipe_fd) < 0)
 goto error;
@@ -205,8 +208,11 @@ virPolkitAgentCreate(void)
 pollfd.fd = pipe_fd[0];
 pollfd.events = POLLHUP;
 
-if (poll(, 1, -1) < 0)
+if (poll(, 1, -1) < 0) {
+virReportSystemError(errno, "%s",
+ _("error in poll call"));
 goto error;
+}
 
 return agent;
 
-- 
2.34.0



[PATCH 2/7] util: Tiny reword fix in comment

2021-11-20 Thread Martin Kletzander
Automatic "Ptr " -> " *" also wreaked havoc in comments.  Fix it and while at it
reword the sentence so it is clear that the object is newly allocated.

Signed-off-by: Martin Kletzander 
---
 src/util/virpolkit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/util/virpolkit.c b/src/util/virpolkit.c
index e90b3b871d15..86255a96760f 100644
--- a/src/util/virpolkit.c
+++ b/src/util/virpolkit.c
@@ -168,7 +168,7 @@ virPolkitAgentDestroy(virPolkitAgent *agent)
  *
  * Allocate and setup a polkit agent
  *
- * Returns a virCommand *on success and NULL on failure
+ * Returns newly allocated virPolkitAgent * on success and NULL on failure
  */
 virPolkitAgent *
 virPolkitAgentCreate(void)
-- 
2.34.0



[PATCH 4/7] virsh: Do not try connecting first time without polkit agent

2021-11-20 Thread Martin Kletzander
Trying to connect once without a polkit agent will generate an error on the
server side which seems too rough given it only serves the purpose of the client
(virsh in this case) to figure out that an agent is needed.  Thankfully we can
just try running the agent.  It does not break anything as we are running it
with `--fallback`, which makes sure it does not replace an existing agent in
case there is one already registered.

The second piece of code trying to start the polkit text agent is kept in order
to _really_ try out starting the agent (and error out when failing to do so)
just in case the agent was not available the first time it was ran.  Even though
it should not happen it avoids a very rare race condition and really does not
add much complexity.

Fixes: https://bugzilla.redhat.com/show_bug.cgi?id=1945501

Signed-off-by: Martin Kletzander 
---
 tools/virsh.c | 8 
 1 file changed, 8 insertions(+)

diff --git a/tools/virsh.c b/tools/virsh.c
index da35c5c2b9c1..5234a3decb22 100644
--- a/tools/virsh.c
+++ b/tools/virsh.c
@@ -129,6 +129,10 @@ virshConnect(vshControl *ctl, const char *uri, bool 
readonly)
 keepalive_forced = true;
 }
 
+if (virPolkitAgentAvailable() &&
+!(pkagent = virPolkitAgentCreate()))
+virResetLastError();
+
 do {
 virErrorPtr err;
 
@@ -140,6 +144,10 @@ virshConnect(vshControl *ctl, const char *uri, bool 
readonly)
 goto cleanup;
 
 err = virGetLastError();
+/*
+ * If polkit agent failed starting the first time, then retry once more
+ * now when we know it really is needed.
+ */
 if (!pkagent &&
 err && err->domain == VIR_FROM_POLKIT &&
 err->code == VIR_ERR_AUTH_UNAVAILABLE) {
-- 
2.34.0



[PATCH 3/7] util: Add virPolkitAgentAvailable

2021-11-20 Thread Martin Kletzander
With this function we can decide whether to try running the polkit text agent
only if it is available, removing a potential needless error saying that the
agent binary does not exist, which is useful especially when running the agent
before knowing whether it is going to be needed.

Signed-off-by: Martin Kletzander 
---
 src/libvirt_private.syms |  1 +
 src/util/virpolkit.c | 44 
 src/util/virpolkit.h |  1 +
 3 files changed, 46 insertions(+)

diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index a7bc50a4d16d..c11be4eafa19 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -3078,6 +3078,7 @@ virPidFileWritePath;
 
 
 # util/virpolkit.h
+virPolkitAgentAvailable;
 virPolkitAgentCreate;
 virPolkitAgentDestroy;
 virPolkitCheckAuth;
diff --git a/src/util/virpolkit.c b/src/util/virpolkit.c
index 86255a96760f..3b333547d70b 100644
--- a/src/util/virpolkit.c
+++ b/src/util/virpolkit.c
@@ -20,6 +20,7 @@
  */
 
 #include 
+#include 
 #include 
 
 #include "virpolkit.h"
@@ -217,6 +218,42 @@ virPolkitAgentCreate(void)
 }
 
 
+/*
+ * virPolkitAgentAvailable
+ *
+ * This function does some preliminary checking that the pkttyagent does not
+ * fail starting so that it can be started without waiting for first failed
+ * connection with VIR_ERR_AUTH_UNAVAILABLE.
+ */
+bool
+virPolkitAgentAvailable(void)
+{
+const char *termid = ctermid(NULL);
+VIR_AUTOCLOSE fd = -1;
+
+if (!virFileExists(PKTTYAGENT))
+return false;
+
+if (!termid)
+return false;
+
+/*
+ *The pkttyagent needs to open the controlling terminal.
+ *
+ * Just in case we are running without a ctty make sure this open() does 
not
+ * change that.
+ *
+ * We could check if our session has a controlling terminal available
+ * instead, but it would require parsing `/proc/self/stat` on Linux, which
+ * is not portable and moreover requires way more work than just open().
+ */
+fd = open(termid, O_RDWR | O_NOCTTY);
+if (fd < 0)
+return false;
+
+return true;
+}
+
 #else /* ! WITH_POLKIT */
 
 int virPolkitCheckAuth(const char *actionid G_GNUC_UNUSED,
@@ -247,4 +284,11 @@ virPolkitAgentCreate(void)
_("polkit text authentication agent unavailable"));
 return NULL;
 }
+
+bool
+virPolkitAgentAvailable(void)
+{
+return false;
+}
+
 #endif /* WITH_POLKIT */
diff --git a/src/util/virpolkit.h b/src/util/virpolkit.h
index a577d59452ba..7bcd040e5e06 100644
--- a/src/util/virpolkit.h
+++ b/src/util/virpolkit.h
@@ -37,3 +37,4 @@ typedef struct _virPolkitAgent virPolkitAgent;
 
 void virPolkitAgentDestroy(virPolkitAgent *cmd);
 virPolkitAgent *virPolkitAgentCreate(void);
+bool virPolkitAgentAvailable(void);
-- 
2.34.0



[PATCH 7/7] util: Make client-side polkit work even with polkit disabled

2021-11-20 Thread Martin Kletzander
The reason for this is twofold:

- the polkit build option is documented for UNIX socket access checks

- there is no server-side change or dbus call done when enabling this as it only
  starts a polkit agent on the client-side (actually only in virsh) and does not
  need any requirements (starting is skipped if pkttyagent is not installed)

Also move the conditional implementation to the bottom of the file so that it
does not look like the whole file is build conditionally and the common
functions are at the top.

Signed-off-by: Martin Kletzander 
---
 src/util/virpolkit.c | 240 ---
 1 file changed, 109 insertions(+), 131 deletions(-)

diff --git a/src/util/virpolkit.c b/src/util/virpolkit.c
index 7156adc10c0a..b51104100796 100644
--- a/src/util/virpolkit.c
+++ b/src/util/virpolkit.c
@@ -21,6 +21,7 @@
 
 #include 
 #include 
+#include 
 #include 
 
 #include "virpolkit.h"
@@ -37,119 +38,10 @@
 
 VIR_LOG_INIT("util.polkit");
 
-#if WITH_POLKIT
-# include 
-
 struct _virPolkitAgent {
 virCommand *cmd;
 };
 
-/*
- * virPolkitCheckAuth:
- * @actionid: permission to check
- * @pid: client process ID
- * @startTime: process start time, or 0
- * @uid: client process user ID
- * @details: NULL terminated (key, value) pair list
- * @allowInteraction: true if auth prompts are allowed
- *
- * Check if a client is authenticated with polkit
- *
- * Returns 0 on success, -1 on failure, -2 on auth denied
- */
-int virPolkitCheckAuth(const char *actionid,
-   pid_t pid,
-   unsigned long long startTime,
-   uid_t uid,
-   const char **details,
-   bool allowInteraction)
-{
-GDBusConnection *sysbus;
-GVariantBuilder builder;
-GVariant *gprocess = NULL;
-GVariant *gdetails = NULL;
-g_autoptr(GVariant) message = NULL;
-g_autoptr(GVariant) reply = NULL;
-g_autoptr(GVariantIter) iter = NULL;
-char *retkey;
-char *retval;
-gboolean is_authorized;
-gboolean is_challenge;
-bool is_dismissed = false;
-const char **next;
-
-if (!(sysbus = virGDBusGetSystemBus()))
-return -1;
-
-VIR_INFO("Checking PID %lld running as %d",
- (long long) pid, uid);
-
-g_variant_builder_init(, G_VARIANT_TYPE("a{sv}"));
-g_variant_builder_add(, "{sv}", "pid", g_variant_new_uint32(pid));
-g_variant_builder_add(, "{sv}", "start-time", 
g_variant_new_uint64(startTime));
-g_variant_builder_add(, "{sv}", "uid", g_variant_new_int32(uid));
-gprocess = g_variant_builder_end();
-
-g_variant_builder_init(, G_VARIANT_TYPE("a{ss}"));
-
-if (details) {
-for (next = details; *next; next++) {
-const char *detail1 = *(next++);
-const char *detail2 = *next;
-g_variant_builder_add(, "{ss}", detail1, detail2);
-}
-}
-
-gdetails = g_variant_builder_end();
-
-message = g_variant_new("((s@a{sv})s@a{ss}us)",
-"unix-process",
-gprocess,
-actionid,
-gdetails,
-allowInteraction,
-"" /* cancellation ID */);
-
-if (virGDBusCallMethod(sysbus,
-   ,
-   G_VARIANT_TYPE("((bba{ss}))"),
-   NULL,
-   "org.freedesktop.PolicyKit1",
-   "/org/freedesktop/PolicyKit1/Authority",
-   "org.freedesktop.PolicyKit1.Authority",
-   "CheckAuthorization",
-   message) < 0)
-return -1;
-
-g_variant_get(reply, "((bba{ss}))", _authorized, _challenge, );
-
-while (g_variant_iter_loop(iter, "{ss}", , )) {
-if (STREQ(retkey, "polkit.dismissed") && STREQ(retval, "true"))
-is_dismissed = true;
-}
-
-VIR_DEBUG("is auth %d  is challenge %d",
-  is_authorized, is_challenge);
-
-if (is_authorized)
-return 0;
-
-if (is_dismissed) {
-virReportError(VIR_ERR_AUTH_CANCELLED, "%s",
-   _("user cancelled authentication process"));
-} else if (is_challenge) {
-virReportError(VIR_ERR_AUTH_UNAVAILABLE,
-   _("no polkit agent available to authenticate action 
'%s'"),
-   actionid);
-} else {
-virReportError(VIR_ERR_AUTH_FAILED, "%s",
-   _("access denied by policy"));
-}
-
-return -2;
-}
-
-
 /* virPolkitAgentDestroy:
  * @cmd: Pointer to the virCommand * created during virPolkitAgentCreate
  *
@@ -260,6 +152,114 @@ virPolkitAgentAvailable(void)
 return true;
 }
 
+
+#if WITH_POLKIT
+
+/*
+ * virPolkitCheckAuth:
+ * @actionid: permission to check
+ * @pid: client process ID
+ * @startTime: process start time, or 0
+ * @uid: client process user ID
+ * @details: 

[PATCH v2 1/2] util: Add virProcessGetStat

2021-11-20 Thread Martin Kletzander
This reads and separates all fields from /proc//stat or
/proc//task//stat as there are easy mistakes to be done in the
implementation.  Some tests are added to show it works correctly.  No number
parsing is done as it would be unused for most of the fields most, if not all,
of the time.  No struct is used for the result as the length can vary (new
fields can be added in the future).

Signed-off-by: Martin Kletzander 
---
 src/libvirt_linux.syms|  3 +
 src/util/virprocess.c | 78 +
 src/util/virprocess.h |  4 ++
 tests/meson.build |  1 +
 tests/virprocessstatdata/complex/stat |  2 +
 tests/virprocessstatdata/simple/stat  |  1 +
 tests/virprocessstattest.c| 84 +++
 7 files changed, 173 insertions(+)
 create mode 100644 tests/virprocessstatdata/complex/stat
 create mode 100644 tests/virprocessstatdata/simple/stat
 create mode 100644 tests/virprocessstattest.c

diff --git a/src/libvirt_linux.syms b/src/libvirt_linux.syms
index 55649ae39cec..14422fae7286 100644
--- a/src/libvirt_linux.syms
+++ b/src/libvirt_linux.syms
@@ -10,6 +10,9 @@ virHostCPUGetSiblingsList;
 virHostCPUGetSocket;
 virHostCPUGetStatsLinux;
 
+# util/virprocess.h
+virProcessGetStat;
+
 # Let emacs know we want case-insensitive sorting
 # Local Variables:
 # sort-fold-case: t
diff --git a/src/util/virprocess.c b/src/util/virprocess.c
index 6de3f36f529c..73ebcaae422f 100644
--- a/src/util/virprocess.c
+++ b/src/util/virprocess.c
@@ -1721,3 +1721,81 @@ virProcessSetScheduler(pid_t pid G_GNUC_UNUSED,
 }
 
 #endif /* !WITH_SCHED_SETSCHEDULER */
+
+#ifdef __linux__
+/*
+ * Get all stat fields for a process based on pid and tid:
+ * - pid == 0 && tid == 0 => /proc/self/stat
+ * - pid != 0 && tid == 0 => /proc//stat
+ * - pid == 0 && tid != 0 => /proc/self/task//stat
+ * - pid != 0 && tid != 0 => /proc//task//stat
+ * and return them as array of strings.
+ */
+GStrv
+virProcessGetStat(pid_t pid,
+  pid_t tid)
+{
+size_t buflen = 0;
+g_autofree char *buf = NULL;
+g_autofree char *path = NULL;
+GStrv rest = NULL;
+GStrv ret = NULL;
+char *comm = NULL;
+char *rparen = NULL;
+size_t nrest = 0;
+
+if (pid) {
+if (tid)
+path = g_strdup_printf("/proc/%d/task/%d/stat", (int)pid, 
(int)tid);
+else
+path = g_strdup_printf("/proc/%d/stat", (int)pid);
+} else {
+if (tid)
+path = g_strdup_printf("/proc/self/task/%d/stat", (int)tid);
+else
+path = g_strdup("/proc/self/stat");
+}
+
+if (virFileReadAllQuiet(path, 1024, ) < 0)
+return NULL;
+
+/* eliminate trailing spaces */
+while (g_ascii_isspace(buf[--buflen]))
+   buf[buflen] = '\0';
+
+/* Find end of the first field */
+if (!(comm = strchr(buf, ' ')))
+return NULL;
+*comm = '\0';
+
+/* Check start of the second field (filename of the executable, in
+ * parentheses) */
+comm++;
+if (*comm != '(')
+return NULL;
+comm++;
+
+/* Check end of the second field (last closing parenthesis) */
+rparen = strrchr(comm, ')');
+if (!rparen)
+return NULL;
+*rparen = '\0';
+
+/* We need to check that the next char is not '\0', but why not just opt in
+ * for the safer way of checking whether it is ' ' (space) instead */
+if (rparen[1] != ' ')
+return NULL;
+
+rest = g_strsplit(rparen + 2, " ", 0);
+nrest = g_strv_length(rest);
+ret = g_new0(char *, nrest + 3);
+ret[0] = g_strdup(buf);
+ret[1] = g_strdup(comm);
+memcpy(ret + 2, rest, nrest * sizeof(char *));
+
+/* Do not use g_strfreev() as individual elements they were moved to @ret. 
*/
+VIR_FREE(rest);
+
+return ret;
+}
+#endif
diff --git a/src/util/virprocess.h b/src/util/virprocess.h
index 9910331a0caa..74dad1f3b15e 100644
--- a/src/util/virprocess.h
+++ b/src/util/virprocess.h
@@ -117,6 +117,10 @@ int virProcessSetupPrivateMountNS(void);
 int virProcessSetScheduler(pid_t pid,
virProcessSchedPolicy policy,
int priority);
+#ifdef __linux__
+GStrv virProcessGetStat(pid_t pid, pid_t tid);
+#endif
+
 typedef enum {
 VIR_PROCESS_NAMESPACE_MNT = (1 << 1),
 VIR_PROCESS_NAMESPACE_IPC = (1 << 2),
diff --git a/tests/meson.build b/tests/meson.build
index 1948c07ae385..f75c24872086 100644
--- a/tests/meson.build
+++ b/tests/meson.build
@@ -347,6 +347,7 @@ if host_machine.system() == 'linux'
 { 'name': 'scsihosttest' },
 { 'name': 'vircaps2xmltest', 'link_whole': [ test_file_wrapper_lib ] },
 { 'name': 'virnetdevbandwidthtest' },
+{ 'name': 'virprocessstattest', 'link_whole': [ test_file_wrapper_lib ] },
 { 'name': 'virresctrltest', 'link_whole': [ test_file_wrapper_lib ] },
 { 'name': 'virscsitest' },
 { 'name': 'virusbtest' },
diff --git a/tests/virprocessstatdata/complex/stat 

[PATCH v2 2/2] Use virProcessGetStat

2021-11-20 Thread Martin Kletzander
This eliminates one incorrect parsing implementation.

Signed-off-by: Martin Kletzander 
---
 src/qemu/qemu_driver.c | 33 ++---
 src/util/virprocess.c  | 48 ++
 2 files changed, 12 insertions(+), 69 deletions(-)

diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index d954635dde2a..0468d6aaf314 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -1399,36 +1399,17 @@ qemuGetSchedInfo(unsigned long long *cpuWait,
 
 static int
 qemuGetProcessInfo(unsigned long long *cpuTime, int *lastCpu, long *vm_rss,
-   pid_t pid, int tid)
+   pid_t pid, pid_t tid)
 {
-g_autofree char *proc = NULL;
-FILE *pidinfo;
+g_auto(GStrv) proc_stat = virProcessGetStat(pid, tid);
 unsigned long long usertime = 0, systime = 0;
 long rss = 0;
 int cpu = 0;
 
-/* In general, we cannot assume pid_t fits in int; but /proc parsing
- * is specific to Linux where int works fine.  */
-if (tid)
-proc = g_strdup_printf("/proc/%d/task/%d/stat", (int)pid, tid);
-else
-proc = g_strdup_printf("/proc/%d/stat", (int)pid);
-if (!proc)
-return -1;
-
-pidinfo = fopen(proc, "r");
-
-/* See 'man proc' for information about what all these fields are. We're
- * only interested in a very few of them */
-if (!pidinfo ||
-fscanf(pidinfo,
-   /* pid -> stime */
-   "%*d (%*[^)]) %*c %*d %*d %*d %*d %*d %*u %*u %*u %*u %*u %llu 
%llu"
-   /* cutime -> endcode */
-   "%*d %*d %*d %*d %*d %*d %*u %*u %ld %*u %*u %*u"
-   /* startstack -> processor */
-   "%*u %*u %*u %*u %*u %*u %*u %*u %*u %*u %*d %d",
-   , , , ) != 4) {
+if (virStrToLong_ullp(proc_stat[13], NULL, 10, ) < 0 ||
+virStrToLong_ullp(proc_stat[14], NULL, 10, ) < 0 ||
+virStrToLong_l(proc_stat[23], NULL, 10, ) < 0 ||
+virStrToLong_i(proc_stat[38], NULL, 10, ) < 0) {
 VIR_WARN("cannot parse process status data");
 }
 
@@ -1450,8 +1431,6 @@ qemuGetProcessInfo(unsigned long long *cpuTime, int 
*lastCpu, long *vm_rss,
 VIR_DEBUG("Got status for %d/%d user=%llu sys=%llu cpu=%d rss=%ld",
   (int)pid, tid, usertime, systime, cpu, rss);
 
-VIR_FORCE_FCLOSE(pidinfo);
-
 return 0;
 }
 
diff --git a/src/util/virprocess.c b/src/util/virprocess.c
index 73ebcaae422f..4def5ecf5eb3 100644
--- a/src/util/virprocess.c
+++ b/src/util/virprocess.c
@@ -1153,56 +1153,20 @@ virProcessSetMaxCoreSize(pid_t pid G_GNUC_UNUSED,
 int virProcessGetStartTime(pid_t pid,
unsigned long long *timestamp)
 {
-char *tmp;
-int len;
-g_autofree char *filename = NULL;
-g_autofree char *buf = NULL;
-g_auto(GStrv) tokens = NULL;
-
-filename = g_strdup_printf("/proc/%llu/stat", (long long)pid);
-
-if ((len = virFileReadAll(filename, 1024, )) < 0)
-return -1;
+g_auto(GStrv) proc_stat = virProcessGetStat(pid, 0);
 
-/* start time is the token at index 19 after the '(process name)' entry - 
since only this
- * field can contain the ')' character, search backwards for this to avoid 
malicious
- * processes trying to fool us
- */
-
-if (!(tmp = strrchr(buf, ')'))) {
+if (!proc_stat || g_strv_length(proc_stat) < 22) {
 virReportError(VIR_ERR_INTERNAL_ERROR,
-   _("Cannot find start time in %s"),
-   filename);
+   _("Cannot find start time for pid %d"), (int)pid);
 return -1;
 }
-tmp += 2; /* skip ') ' */
-if ((tmp - buf) >= len) {
-virReportError(VIR_ERR_INTERNAL_ERROR,
-   _("Cannot find start time in %s"),
-   filename);
-return -1;
-}
-
-tokens = g_strsplit(tmp, " ", 0);
 
-if (!tokens ||
-g_strv_length(tokens) < 20) {
+if (virStrToLong_ull(proc_stat[21], NULL, 10, timestamp) < 0) {
 virReportError(VIR_ERR_INTERNAL_ERROR,
-   _("Cannot find start time in %s"),
-   filename);
+   _("Cannot parse start time %s for pid %d"),
+   proc_stat[21], (int)pid);
 return -1;
 }
-
-if (virStrToLong_ull(tokens[19],
- NULL,
- 10,
- timestamp) < 0) {
-virReportError(VIR_ERR_INTERNAL_ERROR,
-   _("Cannot parse start time %s in %s"),
-   tokens[19], filename);
-return -1;
-}
-
 return 0;
 }
 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
-- 
2.34.0



[PATCH v2 0/2] Fix /proc/*/stat parsing

2021-11-20 Thread Martin Kletzander
While working on some polkit stuff I found out that we are inconsistent with the
way we parse /proc/*/stat files, so I added a new helper instead along with some
tests.  Unfortunately using it for the thing I wanted is not really viable in
the end, so it "violates" the Rule of three, but at least it does something
correctly.

v2:
- Fixed open64 by just using virFileReadAllQuiet instead of g_file_get_contents
- Removed some leftover unused variables
- Still do not know why my cirrus builds fail

v1:
- https://listman.redhat.com/archives/libvir-list/2021-November/msg00580.html

Martin Kletzander (2):
  util: Add virProcessGetStat
  Use virProcessGetStat

 src/libvirt_linux.syms|   3 +
 src/qemu/qemu_driver.c|  33 ++-
 src/util/virprocess.c | 126 +-
 src/util/virprocess.h |   4 +
 tests/meson.build |   1 +
 tests/virprocessstatdata/complex/stat |   2 +
 tests/virprocessstatdata/simple/stat  |   1 +
 tests/virprocessstattest.c|  84 +
 8 files changed, 185 insertions(+), 69 deletions(-)
 create mode 100644 tests/virprocessstatdata/complex/stat
 create mode 100644 tests/virprocessstatdata/simple/stat
 create mode 100644 tests/virprocessstattest.c

-- 
2.34.0



Re: [PATCH 0/2] Fix /proc/*/stat parsing

2021-11-20 Thread Martin Kletzander

On Fri, Nov 19, 2021 at 06:31:34PM +0100, Martin Kletzander wrote:

While working on some polkit stuff I found out that we are inconsistent with the
way we parse /proc/*/stat files, so I added a new helper instead along with some
tests.  Unfortunately using it for the thing I wanted is not really viable in
the end, so it "violates" the Rule of three, but at least it does something
correctly.



Self-NACK, v2 coming up.


Martin Kletzander (2):
 util: Add virProcessGetStat
 Use virProcessGetStat

src/libvirt_linux.syms|   3 +
src/qemu/qemu_driver.c|  29 ++
src/util/virprocess.c | 126 +-
src/util/virprocess.h |   4 +
tests/meson.build |   1 +
tests/virprocessstatdata/complex/stat |   2 +
tests/virprocessstatdata/simple/stat  |   1 +
tests/virprocessstattest.c|  84 +
8 files changed, 185 insertions(+), 65 deletions(-)
create mode 100644 tests/virprocessstatdata/complex/stat
create mode 100644 tests/virprocessstatdata/simple/stat
create mode 100644 tests/virprocessstattest.c

--
2.34.0



signature.asc
Description: PGP signature


Re: [PATCH v1 12/12] target/riscv: Support virtual time context synchronization

2021-11-20 Thread Richard Henderson

On 11/20/21 8:46 AM, Yifei Jiang wrote:

  const VMStateDescription vmstate_riscv_cpu = {
  .name = "cpu",
  .version_id = 3,
  .minimum_version_id = 3,
+.post_load = cpu_post_load,
  .fields = (VMStateField[]) {
  VMSTATE_UINTTL_ARRAY(env.gpr, RISCVCPU, 32),
  VMSTATE_UINT64_ARRAY(env.fpr, RISCVCPU, 32),
@@ -211,6 +221,10 @@ const VMStateDescription vmstate_riscv_cpu = {
  VMSTATE_UINT64(env.mtohost, RISCVCPU),
  VMSTATE_UINT64(env.timecmp, RISCVCPU),
  
+VMSTATE_UINT64(env.kvm_timer_time, RISCVCPU),

+VMSTATE_UINT64(env.kvm_timer_compare, RISCVCPU),
+VMSTATE_UINT64(env.kvm_timer_state, RISCVCPU),
+
  VMSTATE_END_OF_LIST()
  },


Can't alter VMStateDescription.fields without bumping version.

If this is really kvm-only state, consider placing it into a subsection.  But I worry 
about kvm-only state because ideally we'd be able to migrate between tcg and kvm (if only 
for debugging).



r~



Re: [PATCH v1 03/12] target/riscv: Implement function kvm_arch_init_vcpu

2021-11-20 Thread Richard Henderson

On 11/20/21 8:46 AM, Yifei Jiang wrote:

+id = kvm_riscv_reg_id(env, KVM_REG_RISCV_CONFIG, 
KVM_REG_RISCV_CONFIG_REG(isa));
+ret = kvm_get_one_reg(cs, id, );
+if (ret) {
+return ret;
+}
+env->misa_mxl |= isa;


This doesn't look right.
I'm sure you meant

env->misa_ext = isa;


r~



Re: [PATCH v1 08/12] target/riscv: Handle KVM_EXIT_RISCV_SBI exit

2021-11-20 Thread Philippe Mathieu-Daudé
Hi,

On 11/20/21 08:46, Yifei Jiang wrote:
> Use char-fe to handle console sbi call, which implement early
> console io while apply 'earlycon=sbi' into kernel parameters.
> 
> Signed-off-by: Yifei Jiang 
> Signed-off-by: Mingwang Li 
> ---
>  target/riscv/kvm.c | 42 -
>  target/riscv/sbi_ecall_interface.h | 72 ++
>  2 files changed, 113 insertions(+), 1 deletion(-)
>  create mode 100644 target/riscv/sbi_ecall_interface.h
> 
> diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
> index 8da2648d1a..6d419ba02e 100644
> --- a/target/riscv/kvm.c
> +++ b/target/riscv/kvm.c
> @@ -38,6 +38,8 @@
>  #include "qemu/log.h"
>  #include "hw/loader.h"
>  #include "kvm_riscv.h"
> +#include "sbi_ecall_interface.h"
> +#include "chardev/char-fe.h"
>  
>  static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type, uint64_t 
> idx)
>  {
> @@ -440,9 +442,47 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cs)
>  return true;
>  }
>  
> +static int kvm_riscv_handle_sbi(struct kvm_run *run)
> +{
> +int ret = 0;
> +unsigned char ch;
> +switch (run->riscv_sbi.extension_id) {
> +case SBI_EXT_0_1_CONSOLE_PUTCHAR:
> +ch = run->riscv_sbi.args[0];
> +qemu_chr_fe_write(serial_hd(0)->be, , sizeof(ch));
> +break;
> +case SBI_EXT_0_1_CONSOLE_GETCHAR:
> +ret = qemu_chr_fe_read_all(serial_hd(0)->be, , sizeof(ch));
> +if (ret == sizeof(ch)) {
> +run->riscv_sbi.args[0] = ch;
> +} else {
> +run->riscv_sbi.args[0] = -1;
> +}
> +break;

Shouldn't this code use the Semihosting Console API from
"semihosting/console.h" instead?



[PATCH v6 3/4] conf: introduce dirty_ring_size field

2021-11-20 Thread huangy81
From: Hyman Huang(黄勇) 

introduce dirty_ring_size in struct "_virDomainDef" to hold
the ring size configured by user, and pass dirty_ring_size
when building qemu commandline if dirty ring feature enabled.

Signed-off-by: Hyman Huang(黄勇) 
---
 src/conf/domain_conf.c  | 76 -
 src/conf/domain_conf.h  |  4 +++
 src/qemu/qemu_command.c |  3 ++
 3 files changed, 82 insertions(+), 1 deletion(-)

diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index deb32b3f6b..80a124557e 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -205,6 +205,7 @@ VIR_ENUM_IMPL(virDomainKVM,
   "hint-dedicated",
   "poll-control",
   "pv-ipi",
+  "dirty-ring",
 );
 
 VIR_ENUM_IMPL(virDomainXen,
@@ -4826,6 +4827,18 @@ virDomainDefPostParseMemtune(virDomainDef *def)
 }
 
 
+static void
+virDomainDefPostParseFeatures(virDomainDef *def)
+{
+if (def->features[VIR_DOMAIN_FEATURE_KVM] == VIR_TRISTATE_SWITCH_ON &&
+def->kvm_features[VIR_DOMAIN_KVM_DIRTY_RING] == VIR_TRISTATE_SWITCH_ON 
&&
+def->dirty_ring_size == 0) {
+/* set 4096 as default size if dirty ring size not congfigured */
+def->dirty_ring_size = 4096;
+}
+}
+
+
 static int
 virDomainDefAddConsoleCompat(virDomainDef *def)
 {
@@ -6062,6 +6075,8 @@ virDomainDefPostParseCommon(virDomainDef *def,
 
 virDomainDefPostParseMemtune(def);
 
+virDomainDefPostParseFeatures(def);
+
 if (virDomainDefRejectDuplicateControllers(def) < 0)
 return -1;
 
@@ -17566,8 +17581,10 @@ virDomainFeaturesHyperVDefParse(virDomainDef *def,
 
 static int
 virDomainFeaturesKVMDefParse(virDomainDef *def,
+xmlXPathContextPtr ctxt,
  xmlNodePtr node)
 {
+xmlNodePtr tmp_node = ctxt->node;
 def->features[VIR_DOMAIN_FEATURE_KVM] = VIR_TRISTATE_SWITCH_ON;
 
 node = xmlFirstElementChild(node);
@@ -17589,9 +17606,37 @@ virDomainFeaturesKVMDefParse(virDomainDef *def,
 
 def->kvm_features[feature] = value;
 
+/* dirty ring feature should parse size property */
+if ((virDomainKVM) feature == VIR_DOMAIN_KVM_DIRTY_RING) {
+if (((virDomainKVM) feature) == VIR_DOMAIN_KVM_DIRTY_RING &&
+  value == VIR_TRISTATE_SWITCH_ON) {
+ctxt->node = node;
+
+if (virXMLPropString(node, "size")) {
+if (virXPathUInt("string(./@size)", ctxt,
+ >dirty_ring_size) < 0) {
+virReportError(VIR_ERR_XML_ERROR, "%s",
+  _("invalid number of dirty ring size"));
+return -1;
+}
+
+if ((def->dirty_ring_size & (def->dirty_ring_size - 1)) != 
0 ||
+def->dirty_ring_size < 1024 ||
+def->dirty_ring_size > 65536) {
+virReportError(VIR_ERR_XML_ERROR, "%s",
+   _("dirty ring must be power of 2 "
+ "and ranges [1024, 65536]"));
+return -1;
+}
+}
+}
+}
+
 node = xmlNextElementSibling(node);
 }
 
+ctxt->node = tmp_node;
+
 return 0;
 }
 
@@ -17741,7 +17786,7 @@ virDomainFeaturesDefParse(virDomainDef *def,
 break;
 
 case VIR_DOMAIN_FEATURE_KVM:
-if (virDomainFeaturesKVMDefParse(def, nodes[i]) < 0)
+if (virDomainFeaturesKVMDefParse(def, ctxt, nodes[i]) < 0)
 return -1;
 break;
 
@@ -21836,7 +21881,27 @@ virDomainDefFeaturesCheckABIStability(virDomainDef 
*src,

virTristateSwitchTypeToString(dst->kvm_features[i]));
 return false;
 }
+break;
 
+case VIR_DOMAIN_KVM_DIRTY_RING:
+if (src->kvm_features[i] != dst->kvm_features[i]) {
+virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+   _("State of KVM feature '%s' differs: "
+ "source: '%s', destination: '%s'"),
+   virDomainKVMTypeToString(i),
+   
virTristateSwitchTypeToString(src->kvm_features[i]),
+   
virTristateSwitchTypeToString(dst->kvm_features[i]));
+return false;
+}
+
+if (src->dirty_ring_size != dst->dirty_ring_size) {
+virReportError(VIR_ERR_CONFIG_UNSUPPORTED,
+   _("dirty ring size of KVM feature '%s' 
differs: "
+ "source: '%d', destination: '%d'"),
+   virDomainKVMTypeToString(i),
+   src->dirty_ring_size, 

[PATCH v6 1/4] qemu_capabilities: introduce QEMU_CAPS_ACCEL

2021-11-20 Thread huangy81
From: Hyman Huang(黄勇) 

since the "-machine" option for accelerators is legacy, "-accel" option
may be a better mechanism. following are details:
https://lore.kernel.org/qemu-devel/3aa73987-40e8-3619-0723-9f17f7385...@redhat.com/

this patch introduce QEMU_CAPS_ACCEL capability to tell if we're dealing
new enough QEMU so that we can replace '-machine accel' with '-accel'.

there were two phases of -accel support in QEMU:
- 2.9 to 4.2: only one -accel option supported; specifying a fallback
couldn't be done with -accel and required the older "-machine accel=tcg:kvm"
instead.

- 5.0 or newer: multiple -accel options supported, e.g. "-accel tcg
-accel kvm"
and it would be possible to distinguish them, for example using QOM
properties.

however libvirt only ever specifies one accelerator so it makes no
difference.

since the oldest supported qemu by libvirt is qemu-2.11, so modify
capability test cases with QEMU version >= 2.11.0 carefully.

Signed-off-by: Hyman Huang(黄勇) 
---
 src/qemu/qemu_capabilities.c   | 2 ++
 src/qemu/qemu_capabilities.h   | 1 +
 tests/qemucapabilitiesdata/caps_2.11.0.s390x.xml   | 1 +
 tests/qemucapabilitiesdata/caps_2.11.0.x86_64.xml  | 1 +
 tests/qemucapabilitiesdata/caps_2.12.0.aarch64.xml | 1 +
 tests/qemucapabilitiesdata/caps_2.12.0.ppc64.xml   | 1 +
 tests/qemucapabilitiesdata/caps_2.12.0.s390x.xml   | 1 +
 tests/qemucapabilitiesdata/caps_2.12.0.x86_64.xml  | 1 +
 tests/qemucapabilitiesdata/caps_3.0.0.ppc64.xml| 1 +
 tests/qemucapabilitiesdata/caps_3.0.0.riscv32.xml  | 1 +
 tests/qemucapabilitiesdata/caps_3.0.0.riscv64.xml  | 1 +
 tests/qemucapabilitiesdata/caps_3.0.0.s390x.xml| 1 +
 tests/qemucapabilitiesdata/caps_3.0.0.x86_64.xml   | 1 +
 tests/qemucapabilitiesdata/caps_3.1.0.ppc64.xml| 1 +
 tests/qemucapabilitiesdata/caps_3.1.0.x86_64.xml   | 1 +
 tests/qemucapabilitiesdata/caps_4.0.0.aarch64.xml  | 1 +
 tests/qemucapabilitiesdata/caps_4.0.0.ppc64.xml| 1 +
 tests/qemucapabilitiesdata/caps_4.0.0.riscv32.xml  | 1 +
 tests/qemucapabilitiesdata/caps_4.0.0.riscv64.xml  | 1 +
 tests/qemucapabilitiesdata/caps_4.0.0.s390x.xml| 1 +
 tests/qemucapabilitiesdata/caps_4.0.0.x86_64.xml   | 1 +
 tests/qemucapabilitiesdata/caps_4.1.0.x86_64.xml   | 1 +
 tests/qemucapabilitiesdata/caps_4.2.0.aarch64.xml  | 1 +
 tests/qemucapabilitiesdata/caps_4.2.0.ppc64.xml| 1 +
 tests/qemucapabilitiesdata/caps_4.2.0.s390x.xml| 1 +
 tests/qemucapabilitiesdata/caps_4.2.0.x86_64.xml   | 1 +
 tests/qemucapabilitiesdata/caps_5.0.0.aarch64.xml  | 1 +
 tests/qemucapabilitiesdata/caps_5.0.0.ppc64.xml| 1 +
 tests/qemucapabilitiesdata/caps_5.0.0.riscv64.xml  | 1 +
 tests/qemucapabilitiesdata/caps_5.0.0.x86_64.xml   | 1 +
 tests/qemucapabilitiesdata/caps_5.1.0.sparc.xml| 1 +
 tests/qemucapabilitiesdata/caps_5.1.0.x86_64.xml   | 1 +
 tests/qemucapabilitiesdata/caps_5.2.0.aarch64.xml  | 1 +
 tests/qemucapabilitiesdata/caps_5.2.0.ppc64.xml| 1 +
 tests/qemucapabilitiesdata/caps_5.2.0.riscv64.xml  | 1 +
 tests/qemucapabilitiesdata/caps_5.2.0.s390x.xml| 1 +
 tests/qemucapabilitiesdata/caps_5.2.0.x86_64.xml   | 1 +
 tests/qemucapabilitiesdata/caps_6.0.0.aarch64.xml  | 1 +
 tests/qemucapabilitiesdata/caps_6.0.0.s390x.xml| 1 +
 tests/qemucapabilitiesdata/caps_6.0.0.x86_64.xml   | 1 +
 tests/qemucapabilitiesdata/caps_6.1.0.x86_64.xml   | 1 +
 tests/qemucapabilitiesdata/caps_6.2.0.aarch64.xml  | 1 +
 tests/qemucapabilitiesdata/caps_6.2.0.ppc64.xml| 1 +
 tests/qemucapabilitiesdata/caps_6.2.0.x86_64.xml   | 1 +
 44 files changed, 45 insertions(+)

diff --git a/src/qemu/qemu_capabilities.c b/src/qemu/qemu_capabilities.c
index b864f4d7df..4a6a53d88e 100644
--- a/src/qemu/qemu_capabilities.c
+++ b/src/qemu/qemu_capabilities.c
@@ -651,6 +651,7 @@ VIR_ENUM_IMPL(virQEMUCaps,
   "device.json", /* QEMU_CAPS_DEVICE_JSON */
   "query-dirty-rate", /* QEMU_CAPS_QUERY_DIRTY_RATE */
   "rbd-encryption", /* QEMU_CAPS_RBD_ENCRYPTION */
+  "accel", /* QEMU_CAPS_ACCEL */
 );
 
 
@@ -3203,6 +3204,7 @@ static struct virQEMUCapsCommandLineProps 
virQEMUCapsCommandLine[] = {
 { "spice", "rendernode", QEMU_CAPS_SPICE_RENDERNODE },
 { "vnc", "power-control", QEMU_CAPS_VNC_POWER_CONTROL },
 { "vnc", "audiodev", QEMU_CAPS_AUDIODEV },
+{ "accel", NULL, QEMU_CAPS_ACCEL },
 };
 
 static int
diff --git a/src/qemu/qemu_capabilities.h b/src/qemu/qemu_capabilities.h
index 61bdbdb2ac..b625e3910d 100644
--- a/src/qemu/qemu_capabilities.h
+++ b/src/qemu/qemu_capabilities.h
@@ -631,6 +631,7 @@ typedef enum { /* virQEMUCapsFlags grouping marker for 
syntax-check */
 QEMU_CAPS_DEVICE_JSON, /* -device accepts JSON */
 QEMU_CAPS_QUERY_DIRTY_RATE, /* accepts query-dirty-rate */
 QEMU_CAPS_RBD_ENCRYPTION, /* Ceph RBD encryption support */
+QEMU_CAPS_ACCEL, /* -accel */
 
 QEMU_CAPS_LAST /* this must always be the last item */
 } virQEMUCapsFlags;
diff --git 

[PATCH v6 2/4] qemu_command: switch accelerator option to new style

2021-11-20 Thread huangy81
From: Hyman Huang(黄勇) 

QEMU greater than 2.9.0 support '-accel' option, change the way
of assembling commandline from "accel=kvm" to "-accel kvm" when
specifying accelerator.

Signed-off-by: Hyman Huang(黄勇) 
---
 src/qemu/qemu_command.c   | 31 ++-
 ...fault-cpu-kvm-virt-4.2.aarch64-latest.args |  3 +-
 .../aarch64-features-sve.aarch64-latest.args  |  3 +-
 .../channel-unix-guestfwd.x86_64-latest.args  |  3 +-
 .../clock-timer-armvtimer.aarch64-latest.args |  3 +-
 .../console-virtio-unix.x86_64-latest.args|  3 +-
 ...u-Icelake-Server-pconfig.x86_64-3.1.0.args |  3 +-
 ...-Icelake-Server-pconfig.x86_64-latest.args |  3 +-
 .../cpu-host-model.x86_64-2.11.0.args |  3 +-
 .../cpu-host-model.x86_64-2.12.0.args |  3 +-
 .../cpu-host-model.x86_64-3.0.0.args  |  3 +-
 .../cpu-host-model.x86_64-3.1.0.args  |  3 +-
 .../cpu-host-model.x86_64-4.0.0.args  |  3 +-
 .../cpu-host-model.x86_64-4.1.0.args  |  3 +-
 .../cpu-host-model.x86_64-4.2.0.args  |  3 +-
 .../cpu-host-model.x86_64-5.0.0.args  |  3 +-
 .../cpu-host-model.x86_64-5.1.0.args  |  3 +-
 .../cpu-host-model.x86_64-5.2.0.args  |  3 +-
 .../cpu-host-model.x86_64-6.0.0.args  |  3 +-
 .../cpu-host-model.x86_64-6.1.0.args  |  3 +-
 .../cpu-host-model.x86_64-latest.args |  3 +-
 .../cpu-tsc-frequency.x86_64-4.0.0.args   |  3 +-
 ...ult-video-type-aarch64.aarch64-latest.args |  3 +-
 ...default-video-type-ppc64.ppc64-latest.args |  3 +-
 ...default-video-type-s390x.s390x-latest.args |  3 +-
 .../disk-cdrom-network.x86_64-2.12.0.args |  3 +-
 .../disk-cdrom-network.x86_64-latest.args |  3 +-
 .../disk-network-http.x86_64-latest.args  |  3 +-
 .../disk-serial.x86_64-latest.args|  3 +-
 .../graphics-spice-timeout.x86_64-latest.args |  3 +-
 .../hugepages-memaccess3.x86_64-latest.args   |  3 +-
 .../intel-iommu-aw-bits.x86_64-latest.args|  3 +-
 ...ntel-iommu-caching-mode.x86_64-latest.args |  3 +-
 ...ntel-iommu-device-iotlb.x86_64-latest.args |  3 +-
 .../intel-iommu-eim.x86_64-latest.args|  3 +-
 .../launch-security-s390-pv.s390x-latest.args |  3 +-
 ...v-missing-platform-info.x86_64-2.12.0.args |  3 +-
 .../launch-security-sev.x86_64-2.12.0.args|  3 +-
 .../launch-security-sev.x86_64-6.0.0.args |  3 +-
 ...memory-default-hugepage.x86_64-latest.args |  3 +-
 .../memfd-memory-numa.x86_64-latest.args  |  3 +-
 ...mory-hotplug-virtio-mem.x86_64-latest.args |  3 +-
 ...mory-hotplug-virtio-pmem.x86_64-5.2.0.args |  3 +-
 ...ory-hotplug-virtio-pmem.x86_64-latest.args |  3 +-
 .../os-firmware-bios.x86_64-latest.args   |  3 +-
 ...re-efi-no-enrolled-keys.x86_64-latest.args |  3 +-
 ...os-firmware-efi-secboot.x86_64-latest.args |  3 +-
 .../os-firmware-efi.x86_64-latest.args|  3 +-
 .../parallel-unix-chardev.x86_64-latest.args  |  3 +-
 ...ault-cpu-kvm-pseries-2.7.ppc64-latest.args |  3 +-
 ...ault-cpu-kvm-pseries-3.1.ppc64-latest.args |  3 +-
 ...ault-cpu-kvm-pseries-4.2.ppc64-latest.args |  3 +-
 ...t-cpu-kvm-ccw-virtio-2.7.s390x-latest.args |  3 +-
 ...t-cpu-kvm-ccw-virtio-4.2.s390x-latest.args |  3 +-
 ...rtcard-passthrough-unix.x86_64-latest.args |  3 +-
 .../usb-redir-unix.x86_64-latest.args |  3 +-
 ...vhost-user-fs-fd-memory.x86_64-latest.args |  3 +-
 .../virtio-rng-builtin.x86_64-5.2.0.args  |  3 +-
 .../virtio-rng-builtin.x86_64-latest.args |  3 +-
 .../virtio-rng-egd-unix.x86_64-5.2.0.args |  3 +-
 .../virtio-rng-egd-unix.x86_64-latest.args|  3 +-
 ...-default-cpu-kvm-pc-4.2.x86_64-latest.args |  3 +-
 ...default-cpu-kvm-q35-4.2.x86_64-latest.args |  3 +-
 63 files changed, 154 insertions(+), 63 deletions(-)

diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 3108bdd581..a8f73c2d3e 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -7027,6 +7027,23 @@ qemuBuildNameCommandLine(virCommand *cmd,
 return 0;
 }
 
+static void
+qemuBuildAccelCommandLineKvmOptions(virCommand *cmd,
+const virDomainDef *def)
+{
+/*
+ * only handle the kvm case, tcg case use the legacy style
+ * not that either kvm or tcg can be specified by libvirt
+ * so do not worry about the conflict of specifying both
+ * */
+if ((virDomainVirtType)def->virtType == VIR_DOMAIN_VIRT_KVM) {
+g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER;
+virCommandAddArg(cmd, "-accel");
+virBufferAddLit(, "kvm");
+virCommandAddArgBuffer(cmd, );
+}
+}
+
 static int
 qemuBuildMachineCommandLine(virCommand *cmd,
 virQEMUDriverConfig *cfg,
@@ -7048,8 +7065,16 @@ qemuBuildMachineCommandLine(virCommand *cmd,
 virBufferAddLit(, ",accel=tcg");
 break;
 
+/*
+ * QEMU greater than 2.9.0 support '-accel', change the way of
+ * building commandline from "accel=kvm" to "-accel kvm", skip
+ * setting accel 

[PATCH v6 4/4] qemu: support dirty ring feature

2021-11-20 Thread huangy81
From: Hyman Huang(黄勇) 

dirty ring feature was introduced in qemu-6.1, this patch add
corresponding feature named 'dirty-ring', which enable
dirty ring feature when starting vm.

to enable the feature, libvirt add "-accel dirty-ring-size=xxx"
to QEMU command line, the following XML needs to be added to
the guest's domain description:


   
 
   


if property "state=on" but property "size" not be configured, set
default ring size with 4096.

since dirty ring can only be enabled by specifying "-accel" option
and do not support the legacy style, it seems that there's no
other way to work around this, so we use "-accel" option to specify
accelerator instead of "-machine" when building qemu commandline.

details about the qemu "-accel" option:
https://lore.kernel.org/qemu-devel/3aa73987-40e8-3619-0723-9f17f7385...@redhat.com/

Signed-off-by: Hyman Huang(黄勇) 
---
 docs/formatdomain.rst | 18 ++
 docs/schemas/domaincommon.rng | 10 ++
 src/qemu/qemu_command.c   |  6 ++
 3 files changed, 26 insertions(+), 8 deletions(-)

diff --git a/docs/formatdomain.rst b/docs/formatdomain.rst
index eb8c973cf1..ea69b61c70 100644
--- a/docs/formatdomain.rst
+++ b/docs/formatdomain.rst
@@ -1843,6 +1843,7 @@ Hypervisors may allow certain CPU / machine features to 
be toggled on/off.



+   
  
  

@@ -1925,14 +1926,15 @@ are:
 ``kvm``
Various features to change the behavior of the KVM hypervisor.
 
-   == 
 
=== 
-   FeatureDescription  
Value   Since
-   == 
 
=== 
-   hidden Hide the KVM hypervisor from standard MSR based discovery
on, off :since:`1.2.8 (QEMU 2.1.0)`
-   hint-dedicated Allows a guest to enable optimizations when running on 
dedicated vCPUs   on, off :since:`5.7.0 (QEMU 2.12.0)`
-   poll-control   Decrease IO completion latency by introducing a grace period 
of busy waiting on, off :since:`6.10.0 (QEMU 4.2)`
-   pv-ipi Paravirtualized send IPIs
on, off :since:`7.10.0 (QEMU 3.1)`
-   == 
 
=== 
+   == 
 
== 

+   FeatureDescription  
Value  Since
+   == 
 
== 

+   hidden Hide the KVM hypervisor from standard MSR based discovery
on, off
:since:`1.2.8 (QEMU 2.1.0)`
+   hint-dedicated Allows a guest to enable optimizations when running on 
dedicated vCPUs   on, off
:since:`5.7.0 (QEMU 2.12.0)`
+   poll-control   Decrease IO completion latency by introducing a grace period 
of busy waiting on, off
:since:`6.10.0 (QEMU 4.2)`
+   pv-ipi Paravirtualized send IPIs
on, off
:since:`7.10.0 (QEMU 3.1)`
+   dirty-ring Enable dirty ring feature
on, off; size - must be power of 2, range [1024,65536] 
:since:`7.10.0 (QEMU 6.1)`
+   == 
 
== 

 
 ``xen``
Various features to change the behavior of the Xen hypervisor.
diff --git a/docs/schemas/domaincommon.rng b/docs/schemas/domaincommon.rng
index f01b7a6470..5f9fe3cc58 100644
--- a/docs/schemas/domaincommon.rng
+++ b/docs/schemas/domaincommon.rng
@@ -7212,6 +7212,16 @@
 
   
 
+
+  
+
+
+  
+
+  
+
+  
+
   
 
   
diff --git a/src/qemu/qemu_command.c b/src/qemu/qemu_command.c
index 145596d11a..863876bfae 100644
--- a/src/qemu/qemu_command.c
+++ b/src/qemu/qemu_command.c
@@ -7043,6 +7043,12 @@ qemuBuildAccelCommandLineKvmOptions(virCommand *cmd,
 g_auto(virBuffer) buf = VIR_BUFFER_INITIALIZER;
 virCommandAddArg(cmd, "-accel");
 virBufferAddLit(, "kvm");
+
+

[PATCH v6 0/4] Dirty Ring support (Libvirt)

2021-11-20 Thread huangy81
From: "Hyman Huang(黄勇)" 

Ping for this series.

I still keep thinking the dirty ring feature is something good to
have for libvirt.

qemu-6.1 has supported dirty ring feature and followed up with the
commit 0e21bf24 "support dirtyrate at the granualrity of vcpu",
which is a typical usage scenario of dirty ring. another usage
scenario may be the implementation of per-vcpu auto-converge during
live migration which is already being reviewed. so we can make full
use of dirty ring feature if libvirt supports. and any corrections
and comments about this series would be very appreciated.

Please review, Thanks!

Hyman

v6
- rebase on master

v5,v4: blank, just make v6 be the the latest version.

v3
- rebase master and fix the confilict when apply
  "conf: introduce dirty_ring_size in struct "_virDomainDef" to current 
  master.

v2
- split patchset into 4 patches

- leave out the tcg case when building commandline. 

- handle the VIR_DOMAIN_KVM_DIRTY_RING case independently in ,
  virDomainFeatureDefParse and virDomainDefFeaturesCheckABIStability,
  do not integrate it with other cases...

- add dirty ring size check in virDomainDefFeaturesCheckABIStability

- modify zero checks on integers of dirty ring size in a explicit way.

- set the default value of dirty ring size in a post-parser callback.

- check the absence of kvm_feature in a explicit way.

- code clean of virTristateSwitchTypeToString function.

this version's modification base on Peter's advices mostly, thanks
a lot, please review !

v1
since qemu has introduced a dirty ring feature in 6.1.0, may be it's
the right time to introduce dirty ring in libvirt meanwhile.

this patch add feature named 'dirty-ring', which enable dirty ring
feature when starting vm. to try this out, three things has done
in this patchset:

- introduce QEMU_CAPS_ACCEL so the the libvirt can use it to select 
  the right option when specifying the accelerator type.

- switch the option "-machine accel=xxx" to "-accel xxx" when specifying
  accelerator type once libvirt build QEMU command line, so that 
  dirty-ring-size property can be passed to qemu when start vm.

- introduce dirty_ring_size to hold the ring size configured by user
  and pass dirty_ring_size when building qemu commandline if dirty 
  ring feature enabled.

though dirty ring is per-cpu logically, the size of dirty ring is 
registered by 'struct kvm' in QEMU. so we would like to place the 
dirty_ring_size as a property of vm in Libvirt as the QEMU do.

the dirty ring feature is disabled by default, and if enabled, the
default value of ring size if 4096 if size not configured. 

for more details about dirty ring and "-accel" option, please refer to:
https://lore.kernel.org/qemu-devel/20210108165050.406906-10-pet...@redhat.com/
https://lore.kernel.org/qemu-devel/3aa73987-40e8-3619-0723-9f17f7385...@redhat.com/

please review, Thanks!

Best Regards !

Hyman Huang(黄勇) (4):
  qemu_capabilities: introduce QEMU_CAPS_ACCEL
  qemu_command: switch accelerator option to new style
  conf: introduce dirty_ring_size field
  qemu: support dirty ring feature

 docs/formatdomain.rst | 18 +++--
 docs/schemas/domaincommon.rng | 10 +++
 src/conf/domain_conf.c| 76 ++-
 src/conf/domain_conf.h|  4 +
 src/qemu/qemu_capabilities.c  |  2 +
 src/qemu/qemu_capabilities.h  |  1 +
 src/qemu/qemu_command.c   | 40 +-
 .../caps_2.11.0.s390x.xml |  1 +
 .../caps_2.11.0.x86_64.xml|  1 +
 .../caps_2.12.0.aarch64.xml   |  1 +
 .../caps_2.12.0.ppc64.xml |  1 +
 .../caps_2.12.0.s390x.xml |  1 +
 .../caps_2.12.0.x86_64.xml|  1 +
 .../qemucapabilitiesdata/caps_3.0.0.ppc64.xml |  1 +
 .../caps_3.0.0.riscv32.xml|  1 +
 .../caps_3.0.0.riscv64.xml|  1 +
 .../qemucapabilitiesdata/caps_3.0.0.s390x.xml |  1 +
 .../caps_3.0.0.x86_64.xml |  1 +
 .../qemucapabilitiesdata/caps_3.1.0.ppc64.xml |  1 +
 .../caps_3.1.0.x86_64.xml |  1 +
 .../caps_4.0.0.aarch64.xml|  1 +
 .../qemucapabilitiesdata/caps_4.0.0.ppc64.xml |  1 +
 .../caps_4.0.0.riscv32.xml|  1 +
 .../caps_4.0.0.riscv64.xml|  1 +
 .../qemucapabilitiesdata/caps_4.0.0.s390x.xml |  1 +
 .../caps_4.0.0.x86_64.xml |  1 +
 .../caps_4.1.0.x86_64.xml |  1 +
 .../caps_4.2.0.aarch64.xml|  1 +
 .../qemucapabilitiesdata/caps_4.2.0.ppc64.xml |  1 +
 .../qemucapabilitiesdata/caps_4.2.0.s390x.xml |  1 +
 .../caps_4.2.0.x86_64.xml |  1 +
 .../caps_5.0.0.aarch64.xml|  1 +
 .../qemucapabilitiesdata/caps_5.0.0.ppc64.xml |  1 +
 .../caps_5.0.0.riscv64.xml|  1 +
 .../caps_5.0.0.x86_64.xml

[PATCH v1 07/12] target/riscv: Support setting external interrupt by KVM

2021-11-20 Thread Yifei Jiang
Extend riscv_cpu_update_mip() to support setting external interrupt
by KVM. It will call kvm_riscv_set_irq() to change the IRQ state in
the KVM module When kvm is enabled and the MIP_SEIP bit is set in "mask"

In addition, bacause target/riscv/cpu_helper.c is used to TCG, so move
riscv_cpu_update_mip() to target/riscv/cpu.c from target/riscv/cpu_helper.c

Signed-off-by: Yifei Jiang 
Signed-off-by: Mingwang Li 
Reviewed-by: Alistair Francis 
---
 target/riscv/cpu.c| 34 ++
 target/riscv/cpu_helper.c | 27 ---
 target/riscv/kvm-stub.c   |  5 +
 target/riscv/kvm.c| 20 
 target/riscv/kvm_riscv.h  |  1 +
 5 files changed, 60 insertions(+), 27 deletions(-)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index 1c944872a3..a464845c99 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -21,6 +21,7 @@
 #include "qemu/qemu-print.h"
 #include "qemu/ctype.h"
 #include "qemu/log.h"
+#include "qemu/main-loop.h"
 #include "cpu.h"
 #include "internals.h"
 #include "exec/exec-all.h"
@@ -131,6 +132,39 @@ static void set_feature(CPURISCVState *env, int feature)
 env->features |= (1ULL << feature);
 }
 
+#ifndef CONFIG_USER_ONLY
+uint32_t riscv_cpu_update_mip(RISCVCPU *cpu, uint32_t mask, uint32_t value)
+{
+CPURISCVState *env = >env;
+CPUState *cs = CPU(cpu);
+uint32_t old = env->mip;
+bool locked = false;
+
+if (!qemu_mutex_iothread_locked()) {
+locked = true;
+qemu_mutex_lock_iothread();
+}
+
+env->mip = (env->mip & ~mask) | (value & mask);
+
+if (kvm_enabled() && (mask & MIP_SEIP)) {
+kvm_riscv_set_irq(RISCV_CPU(cpu), IRQ_S_EXT, value & MIP_SEIP);
+}
+
+if (env->mip) {
+cpu_interrupt(cs, CPU_INTERRUPT_HARD);
+} else {
+cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
+}
+
+if (locked) {
+qemu_mutex_unlock_iothread();
+}
+
+return old;
+}
+#endif
+
 static void set_resetvec(CPURISCVState *env, target_ulong resetvec)
 {
 #ifndef CONFIG_USER_ONLY
diff --git a/target/riscv/cpu_helper.c b/target/riscv/cpu_helper.c
index 9eeed38c7e..5e36c35b15 100644
--- a/target/riscv/cpu_helper.c
+++ b/target/riscv/cpu_helper.c
@@ -286,33 +286,6 @@ int riscv_cpu_claim_interrupts(RISCVCPU *cpu, uint32_t 
interrupts)
 }
 }
 
-uint32_t riscv_cpu_update_mip(RISCVCPU *cpu, uint32_t mask, uint32_t value)
-{
-CPURISCVState *env = >env;
-CPUState *cs = CPU(cpu);
-uint32_t old = env->mip;
-bool locked = false;
-
-if (!qemu_mutex_iothread_locked()) {
-locked = true;
-qemu_mutex_lock_iothread();
-}
-
-env->mip = (env->mip & ~mask) | (value & mask);
-
-if (env->mip) {
-cpu_interrupt(cs, CPU_INTERRUPT_HARD);
-} else {
-cpu_reset_interrupt(cs, CPU_INTERRUPT_HARD);
-}
-
-if (locked) {
-qemu_mutex_unlock_iothread();
-}
-
-return old;
-}
-
 void riscv_cpu_set_rdtime_fn(CPURISCVState *env, uint64_t (*fn)(uint32_t),
  uint32_t arg)
 {
diff --git a/target/riscv/kvm-stub.c b/target/riscv/kvm-stub.c
index 39b96fe3f4..4e8fc31a21 100644
--- a/target/riscv/kvm-stub.c
+++ b/target/riscv/kvm-stub.c
@@ -23,3 +23,8 @@ void kvm_riscv_reset_vcpu(RISCVCPU *cpu)
 {
 abort();
 }
+
+void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level)
+{
+abort();
+}
diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
index 7f3ffcc2b4..8da2648d1a 100644
--- a/target/riscv/kvm.c
+++ b/target/riscv/kvm.c
@@ -458,6 +458,26 @@ void kvm_riscv_reset_vcpu(RISCVCPU *cpu)
 env->satp = 0;
 }
 
+void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level)
+{
+int ret;
+unsigned virq = level ? KVM_INTERRUPT_SET : KVM_INTERRUPT_UNSET;
+
+if (irq != IRQ_S_EXT) {
+return;
+}
+
+if (!kvm_enabled()) {
+return;
+}
+
+ret = kvm_vcpu_ioctl(CPU(cpu), KVM_INTERRUPT, );
+if (ret < 0) {
+perror("Set irq failed");
+abort();
+}
+}
+
 bool kvm_arch_cpu_check_are_resettable(void)
 {
 return true;
diff --git a/target/riscv/kvm_riscv.h b/target/riscv/kvm_riscv.h
index f38c82bf59..ed281bdce0 100644
--- a/target/riscv/kvm_riscv.h
+++ b/target/riscv/kvm_riscv.h
@@ -20,5 +20,6 @@
 #define QEMU_KVM_RISCV_H
 
 void kvm_riscv_reset_vcpu(RISCVCPU *cpu);
+void kvm_riscv_set_irq(RISCVCPU *cpu, int irq, int level);
 
 #endif
-- 
2.19.1



[PATCH v1 04/12] target/riscv: Implement kvm_arch_get_registers

2021-11-20 Thread Yifei Jiang
Get GPR CSR and FP registers from kvm by KVM_GET_ONE_REG ioctl.

Signed-off-by: Yifei Jiang 
Signed-off-by: Mingwang Li 
Reviewed-by: Alistair Francis 
---
 target/riscv/kvm.c | 150 -
 1 file changed, 149 insertions(+), 1 deletion(-)

diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
index 9f9692fb9e..b49c24be0a 100644
--- a/target/riscv/kvm.c
+++ b/target/riscv/kvm.c
@@ -55,13 +55,161 @@ static uint64_t kvm_riscv_reg_id(CPURISCVState *env, 
uint64_t type, uint64_t idx
 return id;
 }
 
+#define RISCV_CORE_REG(env, name)  kvm_riscv_reg_id(env, KVM_REG_RISCV_CORE, \
+ KVM_REG_RISCV_CORE_REG(name))
+
+#define RISCV_CSR_REG(env, name)  kvm_riscv_reg_id(env, KVM_REG_RISCV_CSR, \
+ KVM_REG_RISCV_CSR_REG(name))
+
+#define RISCV_FP_F_REG(env, idx)  kvm_riscv_reg_id(env, KVM_REG_RISCV_FP_F, 
idx)
+
+#define RISCV_FP_D_REG(env, idx)  kvm_riscv_reg_id(env, KVM_REG_RISCV_FP_D, 
idx)
+
+static int kvm_riscv_get_regs_core(CPUState *cs)
+{
+int ret = 0;
+int i;
+target_ulong reg;
+CPURISCVState *env = _CPU(cs)->env;
+
+ret = kvm_get_one_reg(cs, RISCV_CORE_REG(env, regs.pc), );
+if (ret) {
+return ret;
+}
+env->pc = reg;
+
+for (i = 1; i < 32; i++) {
+uint64_t id = kvm_riscv_reg_id(env, KVM_REG_RISCV_CORE, i);
+ret = kvm_get_one_reg(cs, id, );
+if (ret) {
+return ret;
+}
+env->gpr[i] = reg;
+}
+
+return ret;
+}
+
+static int kvm_riscv_get_regs_csr(CPUState *cs)
+{
+int ret = 0;
+target_ulong reg;
+CPURISCVState *env = _CPU(cs)->env;
+
+ret = kvm_get_one_reg(cs, RISCV_CSR_REG(env, sstatus), );
+if (ret) {
+return ret;
+}
+env->mstatus = reg;
+
+ret = kvm_get_one_reg(cs, RISCV_CSR_REG(env, sie), );
+if (ret) {
+return ret;
+}
+env->mie = reg;
+
+ret = kvm_get_one_reg(cs, RISCV_CSR_REG(env, stvec), );
+if (ret) {
+return ret;
+}
+env->stvec = reg;
+
+ret = kvm_get_one_reg(cs, RISCV_CSR_REG(env, sscratch), );
+if (ret) {
+return ret;
+}
+env->sscratch = reg;
+
+ret = kvm_get_one_reg(cs, RISCV_CSR_REG(env, sepc), );
+if (ret) {
+return ret;
+}
+env->sepc = reg;
+
+ret = kvm_get_one_reg(cs, RISCV_CSR_REG(env, scause), );
+if (ret) {
+return ret;
+}
+env->scause = reg;
+
+ret = kvm_get_one_reg(cs, RISCV_CSR_REG(env, stval), );
+if (ret) {
+return ret;
+}
+env->stval = reg;
+
+ret = kvm_get_one_reg(cs, RISCV_CSR_REG(env, sip), );
+if (ret) {
+return ret;
+}
+env->mip = reg;
+
+ret = kvm_get_one_reg(cs, RISCV_CSR_REG(env, satp), );
+if (ret) {
+return ret;
+}
+env->satp = reg;
+
+return ret;
+}
+
+static int kvm_riscv_get_regs_fp(CPUState *cs)
+{
+int ret = 0;
+int i;
+CPURISCVState *env = _CPU(cs)->env;
+
+if (riscv_has_ext(env, RVD)) {
+uint64_t reg;
+for (i = 0; i < 32; i++) {
+ret = kvm_get_one_reg(cs, RISCV_FP_D_REG(env, i), );
+if (ret) {
+return ret;
+}
+env->fpr[i] = reg;
+}
+return ret;
+}
+
+if (riscv_has_ext(env, RVF)) {
+uint32_t reg;
+for (i = 0; i < 32; i++) {
+ret = kvm_get_one_reg(cs, RISCV_FP_F_REG(env, i), );
+if (ret) {
+return ret;
+}
+env->fpr[i] = reg;
+}
+return ret;
+}
+
+return ret;
+}
+
 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
 KVM_CAP_LAST_INFO
 };
 
 int kvm_arch_get_registers(CPUState *cs)
 {
-return 0;
+int ret = 0;
+
+ret = kvm_riscv_get_regs_core(cs);
+if (ret) {
+return ret;
+}
+
+ret = kvm_riscv_get_regs_csr(cs);
+if (ret) {
+return ret;
+}
+
+ret = kvm_riscv_get_regs_fp(cs);
+if (ret) {
+return ret;
+}
+
+return ret;
 }
 
 int kvm_arch_put_registers(CPUState *cs, int level)
-- 
2.19.1



[PATCH v1 10/12] target/riscv: Add kvm_riscv_get/put_regs_timer

2021-11-20 Thread Yifei Jiang
Add kvm_riscv_get/put_regs_timer to synchronize virtual time context
from KVM.

To set register of RISCV_TIMER_REG(state) will occur a error from KVM
on kvm_timer_state == 0. It's better to adapt in KVM, but it doesn't matter
that adaping in QEMU.

Signed-off-by: Yifei Jiang 
Signed-off-by: Mingwang Li 
---
 target/riscv/cpu.h |  6 
 target/riscv/kvm.c | 72 ++
 2 files changed, 78 insertions(+)

diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index e7dba35acb..dea49e53f0 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -259,6 +259,12 @@ struct CPURISCVState {
 
 hwaddr kernel_addr;
 hwaddr fdt_addr;
+
+/* kvm timer */
+bool kvm_timer_dirty;
+uint64_t kvm_timer_time;
+uint64_t kvm_timer_compare;
+uint64_t kvm_timer_state;
 };
 
 OBJECT_DECLARE_TYPE(RISCVCPU, RISCVCPUClass,
diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
index 6d419ba02e..e5725770f2 100644
--- a/target/riscv/kvm.c
+++ b/target/riscv/kvm.c
@@ -64,6 +64,9 @@ static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t 
type, uint64_t idx
 #define RISCV_CSR_REG(env, name)  kvm_riscv_reg_id(env, KVM_REG_RISCV_CSR, \
  KVM_REG_RISCV_CSR_REG(name))
 
+#define RISCV_TIMER_REG(env, name)  kvm_riscv_reg_id(env, KVM_REG_RISCV_TIMER, 
\
+ KVM_REG_RISCV_TIMER_REG(name))
+
 #define RISCV_FP_F_REG(env, idx)  kvm_riscv_reg_id(env, KVM_REG_RISCV_FP_F, 
idx)
 
 #define RISCV_FP_D_REG(env, idx)  kvm_riscv_reg_id(env, KVM_REG_RISCV_FP_D, 
idx)
@@ -310,6 +313,75 @@ static int kvm_riscv_put_regs_fp(CPUState *cs)
 return ret;
 }
 
+static void kvm_riscv_get_regs_timer(CPUState *cs)
+{
+int ret;
+uint64_t reg;
+CPURISCVState *env = _CPU(cs)->env;
+
+if (env->kvm_timer_dirty) {
+return;
+}
+
+ret = kvm_get_one_reg(cs, RISCV_TIMER_REG(env, time), );
+if (ret) {
+abort();
+}
+env->kvm_timer_time = reg;
+
+ret = kvm_get_one_reg(cs, RISCV_TIMER_REG(env, compare), );
+if (ret) {
+abort();
+}
+env->kvm_timer_compare = reg;
+
+ret = kvm_get_one_reg(cs, RISCV_TIMER_REG(env, state), );
+if (ret) {
+abort();
+}
+env->kvm_timer_state = reg;
+
+env->kvm_timer_dirty = true;
+}
+
+static void kvm_riscv_put_regs_timer(CPUState *cs)
+{
+int ret;
+uint64_t reg;
+CPURISCVState *env = _CPU(cs)->env;
+
+if (!env->kvm_timer_dirty) {
+return;
+}
+
+reg = env->kvm_timer_time;
+ret = kvm_set_one_reg(cs, RISCV_TIMER_REG(env, time), );
+if (ret) {
+abort();
+}
+
+reg = env->kvm_timer_compare;
+ret = kvm_set_one_reg(cs, RISCV_TIMER_REG(env, compare), );
+if (ret) {
+abort();
+}
+
+/*
+ * To set register of RISCV_TIMER_REG(state) will occur a error from KVM
+ * on env->kvm_timer_state == 0, It's better to adapt in KVM, but it
+ * doesn't matter that adaping in QEMU now.
+ * TODO If KVM changes, adapt here.
+ */
+if (env->kvm_timer_state) {
+reg = env->kvm_timer_state;
+ret = kvm_set_one_reg(cs, RISCV_TIMER_REG(env, state), );
+if (ret) {
+abort();
+}
+}
+
+env->kvm_timer_dirty = false;
+}
 
 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
 KVM_CAP_LAST_INFO
-- 
2.19.1



[PATCH v1 08/12] target/riscv: Handle KVM_EXIT_RISCV_SBI exit

2021-11-20 Thread Yifei Jiang
Use char-fe to handle console sbi call, which implement early
console io while apply 'earlycon=sbi' into kernel parameters.

Signed-off-by: Yifei Jiang 
Signed-off-by: Mingwang Li 
---
 target/riscv/kvm.c | 42 -
 target/riscv/sbi_ecall_interface.h | 72 ++
 2 files changed, 113 insertions(+), 1 deletion(-)
 create mode 100644 target/riscv/sbi_ecall_interface.h

diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
index 8da2648d1a..6d419ba02e 100644
--- a/target/riscv/kvm.c
+++ b/target/riscv/kvm.c
@@ -38,6 +38,8 @@
 #include "qemu/log.h"
 #include "hw/loader.h"
 #include "kvm_riscv.h"
+#include "sbi_ecall_interface.h"
+#include "chardev/char-fe.h"
 
 static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type, uint64_t 
idx)
 {
@@ -440,9 +442,47 @@ bool kvm_arch_stop_on_emulation_error(CPUState *cs)
 return true;
 }
 
+static int kvm_riscv_handle_sbi(struct kvm_run *run)
+{
+int ret = 0;
+unsigned char ch;
+switch (run->riscv_sbi.extension_id) {
+case SBI_EXT_0_1_CONSOLE_PUTCHAR:
+ch = run->riscv_sbi.args[0];
+qemu_chr_fe_write(serial_hd(0)->be, , sizeof(ch));
+break;
+case SBI_EXT_0_1_CONSOLE_GETCHAR:
+ret = qemu_chr_fe_read_all(serial_hd(0)->be, , sizeof(ch));
+if (ret == sizeof(ch)) {
+run->riscv_sbi.args[0] = ch;
+} else {
+run->riscv_sbi.args[0] = -1;
+}
+break;
+default:
+qemu_log_mask(LOG_UNIMP,
+  "%s: un-handled SBI EXIT, specific reasons is %lu\n",
+  __func__, run->riscv_sbi.extension_id);
+ret = -1;
+break;
+}
+return ret;
+}
+
 int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
 {
-return 0;
+int ret = 0;
+switch (run->exit_reason) {
+case KVM_EXIT_RISCV_SBI:
+ret = kvm_riscv_handle_sbi(run);
+break;
+default:
+qemu_log_mask(LOG_UNIMP, "%s: un-handled exit reason %d\n",
+  __func__, run->exit_reason);
+ret = -1;
+break;
+}
+return ret;
 }
 
 void kvm_riscv_reset_vcpu(RISCVCPU *cpu)
diff --git a/target/riscv/sbi_ecall_interface.h 
b/target/riscv/sbi_ecall_interface.h
new file mode 100644
index 00..fb1a3fa8f2
--- /dev/null
+++ b/target/riscv/sbi_ecall_interface.h
@@ -0,0 +1,72 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause
+ *
+ * Copyright (c) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ *   Anup Patel 
+ */
+
+#ifndef __SBI_ECALL_INTERFACE_H__
+#define __SBI_ECALL_INTERFACE_H__
+
+/* clang-format off */
+
+/* SBI Extension IDs */
+#define SBI_EXT_0_1_SET_TIMER   0x0
+#define SBI_EXT_0_1_CONSOLE_PUTCHAR 0x1
+#define SBI_EXT_0_1_CONSOLE_GETCHAR 0x2
+#define SBI_EXT_0_1_CLEAR_IPI   0x3
+#define SBI_EXT_0_1_SEND_IPI0x4
+#define SBI_EXT_0_1_REMOTE_FENCE_I  0x5
+#define SBI_EXT_0_1_REMOTE_SFENCE_VMA   0x6
+#define SBI_EXT_0_1_REMOTE_SFENCE_VMA_ASID 0x7
+#define SBI_EXT_0_1_SHUTDOWN0x8
+#define SBI_EXT_BASE0x10
+#define SBI_EXT_TIME0x54494D45
+#define SBI_EXT_IPI 0x735049
+#define SBI_EXT_RFENCE  0x52464E43
+#define SBI_EXT_HSM 0x48534D
+
+/* SBI function IDs for BASE extension*/
+#define SBI_EXT_BASE_GET_SPEC_VERSION   0x0
+#define SBI_EXT_BASE_GET_IMP_ID 0x1
+#define SBI_EXT_BASE_GET_IMP_VERSION0x2
+#define SBI_EXT_BASE_PROBE_EXT  0x3
+#define SBI_EXT_BASE_GET_MVENDORID  0x4
+#define SBI_EXT_BASE_GET_MARCHID0x5
+#define SBI_EXT_BASE_GET_MIMPID 0x6
+
+/* SBI function IDs for TIME extension*/
+#define SBI_EXT_TIME_SET_TIMER  0x0
+
+/* SBI function IDs for IPI extension*/
+#define SBI_EXT_IPI_SEND_IPI0x0
+
+/* SBI function IDs for RFENCE extension*/
+#define SBI_EXT_RFENCE_REMOTE_FENCE_I   0x0
+#define SBI_EXT_RFENCE_REMOTE_SFENCE_VMA0x1
+#define SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID  0x2
+#define SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA   0x3
+#define SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID 0x4
+#define SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA   0x5
+#define SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID 0x6
+
+/* SBI function IDs for HSM extension */
+#define SBI_EXT_HSM_HART_START  0x0
+#define SBI_EXT_HSM_HART_STOP   0x1
+#define SBI_EXT_HSM_HART_GET_STATUS 0x2
+
+#define SBI_HSM_HART_STATUS_STARTED 0x0
+#define SBI_HSM_HART_STATUS_STOPPED 0x1
+#define SBI_HSM_HART_STATUS_START_PENDING   0x2
+#define SBI_HSM_HART_STATUS_STOP_PENDING0x3
+
+#define SBI_SPEC_VERSION_MAJOR_OFFSET   24
+#define SBI_SPEC_VERSION_MAJOR_MASK 0x7f
+#define SBI_SPEC_VERSION_MINOR_MASK 0xff
+#define SBI_EXT_VENDOR_START0x0900
+#define SBI_EXT_VENDOR_END  0x09FF
+/* clang-format on */
+
+#endif
-- 
2.19.1



[PATCH v1 11/12] target/riscv: Implement virtual time adjusting with vm state changing

2021-11-20 Thread Yifei Jiang
We hope that virtual time adjusts with vm state changing. When a vm
is stopped, guest virtual time should stop counting and kvm_timer
should be stopped. When the vm is resumed, guest virtual time should
continue to count and kvm_timer should be restored.

Signed-off-by: Yifei Jiang 
Signed-off-by: Mingwang Li 
---
 target/riscv/kvm.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
index e5725770f2..b2e14d579e 100644
--- a/target/riscv/kvm.c
+++ b/target/riscv/kvm.c
@@ -40,6 +40,7 @@
 #include "kvm_riscv.h"
 #include "sbi_ecall_interface.h"
 #include "chardev/char-fe.h"
+#include "sysemu/runstate.h"
 
 static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type, uint64_t 
idx)
 {
@@ -452,6 +453,17 @@ unsigned long kvm_arch_vcpu_id(CPUState *cpu)
 return cpu->cpu_index;
 }
 
+static void kvm_riscv_vm_state_change(void *opaque, bool running, RunState 
state)
+{
+CPUState *cs = opaque;
+
+if (running) {
+kvm_riscv_put_regs_timer(cs);
+} else {
+kvm_riscv_get_regs_timer(cs);
+}
+}
+
 void kvm_arch_init_irq_routing(KVMState *s)
 {
 }
@@ -464,6 +476,8 @@ int kvm_arch_init_vcpu(CPUState *cs)
 CPURISCVState *env = >env;
 uint64_t id;
 
+qemu_add_vm_change_state_handler(kvm_riscv_vm_state_change, cs);
+
 id = kvm_riscv_reg_id(env, KVM_REG_RISCV_CONFIG, 
KVM_REG_RISCV_CONFIG_REG(isa));
 ret = kvm_get_one_reg(cs, id, );
 if (ret) {
-- 
2.19.1



[PATCH v1 05/12] target/riscv: Implement kvm_arch_put_registers

2021-11-20 Thread Yifei Jiang
Put GPR CSR and FP registers to kvm by KVM_SET_ONE_REG ioctl

Signed-off-by: Yifei Jiang 
Signed-off-by: Mingwang Li 
Reviewed-by: Alistair Francis 
---
 target/riscv/kvm.c | 141 -
 1 file changed, 140 insertions(+), 1 deletion(-)

diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
index b49c24be0a..5fe5ca4434 100644
--- a/target/riscv/kvm.c
+++ b/target/riscv/kvm.c
@@ -90,6 +90,31 @@ static int kvm_riscv_get_regs_core(CPUState *cs)
 return ret;
 }
 
+static int kvm_riscv_put_regs_core(CPUState *cs)
+{
+int ret = 0;
+int i;
+target_ulong reg;
+CPURISCVState *env = _CPU(cs)->env;
+
+reg = env->pc;
+ret = kvm_set_one_reg(cs, RISCV_CORE_REG(env, regs.pc), );
+if (ret) {
+return ret;
+}
+
+for (i = 1; i < 32; i++) {
+uint64_t id = kvm_riscv_reg_id(env, KVM_REG_RISCV_CORE, i);
+reg = env->gpr[i];
+ret = kvm_set_one_reg(cs, id, );
+if (ret) {
+return ret;
+}
+}
+
+return ret;
+}
+
 static int kvm_riscv_get_regs_csr(CPUState *cs)
 {
 int ret = 0;
@@ -153,6 +178,69 @@ static int kvm_riscv_get_regs_csr(CPUState *cs)
 return ret;
 }
 
+static int kvm_riscv_put_regs_csr(CPUState *cs)
+{
+int ret = 0;
+target_ulong reg;
+CPURISCVState *env = _CPU(cs)->env;
+
+reg = env->mstatus;
+ret = kvm_set_one_reg(cs, RISCV_CSR_REG(env, sstatus), );
+if (ret) {
+return ret;
+}
+
+reg = env->mie;
+ret = kvm_set_one_reg(cs, RISCV_CSR_REG(env, sie), );
+if (ret) {
+return ret;
+}
+
+reg = env->stvec;
+ret = kvm_set_one_reg(cs, RISCV_CSR_REG(env, stvec), );
+if (ret) {
+return ret;
+}
+
+reg = env->sscratch;
+ret = kvm_set_one_reg(cs, RISCV_CSR_REG(env, sscratch), );
+if (ret) {
+return ret;
+}
+
+reg = env->sepc;
+ret = kvm_set_one_reg(cs, RISCV_CSR_REG(env, sepc), );
+if (ret) {
+return ret;
+}
+
+reg = env->scause;
+ret = kvm_set_one_reg(cs, RISCV_CSR_REG(env, scause), );
+if (ret) {
+return ret;
+}
+
+reg = env->stval;
+ret = kvm_set_one_reg(cs, RISCV_CSR_REG(env, stval), );
+if (ret) {
+return ret;
+}
+
+reg = env->mip;
+ret = kvm_set_one_reg(cs, RISCV_CSR_REG(env, sip), );
+if (ret) {
+return ret;
+}
+
+reg = env->satp;
+ret = kvm_set_one_reg(cs, RISCV_CSR_REG(env, satp), );
+if (ret) {
+return ret;
+}
+
+return ret;
+}
+
 static int kvm_riscv_get_regs_fp(CPUState *cs)
 {
 int ret = 0;
@@ -186,6 +274,40 @@ static int kvm_riscv_get_regs_fp(CPUState *cs)
 return ret;
 }
 
+static int kvm_riscv_put_regs_fp(CPUState *cs)
+{
+int ret = 0;
+int i;
+CPURISCVState *env = _CPU(cs)->env;
+
+if (riscv_has_ext(env, RVD)) {
+uint64_t reg;
+for (i = 0; i < 32; i++) {
+reg = env->fpr[i];
+ret = kvm_set_one_reg(cs, RISCV_FP_D_REG(env, i), );
+if (ret) {
+return ret;
+}
+}
+return ret;
+}
+
+if (riscv_has_ext(env, RVF)) {
+uint32_t reg;
+for (i = 0; i < 32; i++) {
+reg = env->fpr[i];
+ret = kvm_set_one_reg(cs, RISCV_FP_F_REG(env, i), );
+if (ret) {
+return ret;
+}
+}
+return ret;
+}
+
+return ret;
+}
+
+
 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
 KVM_CAP_LAST_INFO
 };
@@ -214,7 +336,24 @@ int kvm_arch_get_registers(CPUState *cs)
 
 int kvm_arch_put_registers(CPUState *cs, int level)
 {
-return 0;
+int ret = 0;
+
+ret = kvm_riscv_put_regs_core(cs);
+if (ret) {
+return ret;
+}
+
+ret = kvm_riscv_put_regs_csr(cs);
+if (ret) {
+return ret;
+}
+
+ret = kvm_riscv_put_regs_fp(cs);
+if (ret) {
+return ret;
+}
+
+return ret;
 }
 
 int kvm_arch_release_virq_post(int virq)
-- 
2.19.1



[PATCH v1 09/12] target/riscv: Add host cpu type

2021-11-20 Thread Yifei Jiang
'host' type cpu is set isa to RV32 or RV64 simply, more isa info
will obtain from KVM in kvm_arch_init_vcpu()

Signed-off-by: Yifei Jiang 
Signed-off-by: Mingwang Li 
Reviewed-by: Alistair Francis 
---
 target/riscv/cpu.c | 15 +++
 target/riscv/cpu.h |  1 +
 2 files changed, 16 insertions(+)

diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index a464845c99..6512182c62 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -247,6 +247,18 @@ static void rv32_imafcu_nommu_cpu_init(Object *obj)
 }
 #endif
 
+#if defined(CONFIG_KVM)
+static void riscv_host_cpu_init(Object *obj)
+{
+CPURISCVState *env = _CPU(obj)->env;
+#if defined(TARGET_RISCV32)
+set_misa(env, MXL_RV32, 0);
+#elif defined(TARGET_RISCV64)
+set_misa(env, MXL_RV64, 0);
+#endif
+}
+#endif
+
 static ObjectClass *riscv_cpu_class_by_name(const char *cpu_model)
 {
 ObjectClass *oc;
@@ -844,6 +856,9 @@ static const TypeInfo riscv_cpu_type_infos[] = {
 .class_init = riscv_cpu_class_init,
 },
 DEFINE_CPU(TYPE_RISCV_CPU_ANY,  riscv_any_cpu_init),
+#if defined(CONFIG_KVM)
+DEFINE_CPU(TYPE_RISCV_CPU_HOST, riscv_host_cpu_init),
+#endif
 #if defined(TARGET_RISCV32)
 DEFINE_CPU(TYPE_RISCV_CPU_BASE32,   rv32_base_cpu_init),
 DEFINE_CPU(TYPE_RISCV_CPU_IBEX, rv32_ibex_cpu_init),
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 2807eb1bcb..e7dba35acb 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -45,6 +45,7 @@
 #define TYPE_RISCV_CPU_SIFIVE_E51   RISCV_CPU_TYPE_NAME("sifive-e51")
 #define TYPE_RISCV_CPU_SIFIVE_U34   RISCV_CPU_TYPE_NAME("sifive-u34")
 #define TYPE_RISCV_CPU_SIFIVE_U54   RISCV_CPU_TYPE_NAME("sifive-u54")
+#define TYPE_RISCV_CPU_HOST RISCV_CPU_TYPE_NAME("host")
 
 #if defined(TARGET_RISCV32)
 # define TYPE_RISCV_CPU_BASETYPE_RISCV_CPU_BASE32
-- 
2.19.1



[PATCH v1 12/12] target/riscv: Support virtual time context synchronization

2021-11-20 Thread Yifei Jiang
Add virtual time context description to vmstate_riscv_cpu. After cpu being
loaded, virtual time context is updated to KVM.

Signed-off-by: Yifei Jiang 
Signed-off-by: Mingwang Li 
---
 target/riscv/machine.c | 14 ++
 1 file changed, 14 insertions(+)

diff --git a/target/riscv/machine.c b/target/riscv/machine.c
index ad8248ebfd..153215549b 100644
--- a/target/riscv/machine.c
+++ b/target/riscv/machine.c
@@ -164,10 +164,20 @@ static const VMStateDescription vmstate_pointermasking = {
 }
 };
 
+static int cpu_post_load(void *opaque, int version_id)
+{
+RISCVCPU *cpu = opaque;
+CPURISCVState *env = >env;
+
+env->kvm_timer_dirty = true;
+return 0;
+}
+
 const VMStateDescription vmstate_riscv_cpu = {
 .name = "cpu",
 .version_id = 3,
 .minimum_version_id = 3,
+.post_load = cpu_post_load,
 .fields = (VMStateField[]) {
 VMSTATE_UINTTL_ARRAY(env.gpr, RISCVCPU, 32),
 VMSTATE_UINT64_ARRAY(env.fpr, RISCVCPU, 32),
@@ -211,6 +221,10 @@ const VMStateDescription vmstate_riscv_cpu = {
 VMSTATE_UINT64(env.mtohost, RISCVCPU),
 VMSTATE_UINT64(env.timecmp, RISCVCPU),
 
+VMSTATE_UINT64(env.kvm_timer_time, RISCVCPU),
+VMSTATE_UINT64(env.kvm_timer_compare, RISCVCPU),
+VMSTATE_UINT64(env.kvm_timer_state, RISCVCPU),
+
 VMSTATE_END_OF_LIST()
 },
 .subsections = (const VMStateDescription * []) {
-- 
2.19.1



[PATCH v1 06/12] target/riscv: Support start kernel directly by KVM

2021-11-20 Thread Yifei Jiang
Get kernel and fdt start address in virt.c, and pass them to KVM
when cpu reset. In addition, add kvm_riscv.h to place riscv specific
interface.

Signed-off-by: Yifei Jiang 
Signed-off-by: Mingwang Li 
Reviewed-by: Alistair Francis 
---
 hw/riscv/boot.c  | 11 +++
 hw/riscv/virt.c  |  7 +++
 include/hw/riscv/boot.h  |  1 +
 target/riscv/cpu.c   |  8 
 target/riscv/cpu.h   |  3 +++
 target/riscv/kvm-stub.c  | 25 +
 target/riscv/kvm.c   | 14 ++
 target/riscv/kvm_riscv.h | 24 
 target/riscv/meson.build |  2 +-
 9 files changed, 94 insertions(+), 1 deletion(-)
 create mode 100644 target/riscv/kvm-stub.c
 create mode 100644 target/riscv/kvm_riscv.h

diff --git a/hw/riscv/boot.c b/hw/riscv/boot.c
index 519fa455a1..00df6d7810 100644
--- a/hw/riscv/boot.c
+++ b/hw/riscv/boot.c
@@ -317,3 +317,14 @@ void riscv_setup_rom_reset_vec(MachineState *machine, 
RISCVHartArrayState *harts
 
 return;
 }
+
+void riscv_setup_direct_kernel(hwaddr kernel_addr, hwaddr fdt_addr)
+{
+CPUState *cs;
+
+for (cs = first_cpu; cs; cs = CPU_NEXT(cs)) {
+RISCVCPU *riscv_cpu = RISCV_CPU(cs);
+riscv_cpu->env.kernel_addr = kernel_addr;
+riscv_cpu->env.fdt_addr = fdt_addr;
+}
+}
diff --git a/hw/riscv/virt.c b/hw/riscv/virt.c
index 3af074148e..e3452b25e8 100644
--- a/hw/riscv/virt.c
+++ b/hw/riscv/virt.c
@@ -941,6 +941,13 @@ static void virt_machine_init(MachineState *machine)
   virt_memmap[VIRT_MROM].size, kernel_entry,
   fdt_load_addr, machine->fdt);
 
+/*
+ * Only direct boot kernel is currently supported for KVM VM,
+ * So here setup kernel start address and fdt address.
+ * TODO:Support firmware loading and integrate to TCG start
+ */
+riscv_setup_direct_kernel(kernel_entry, fdt_load_addr);
+
 /* SiFive Test MMIO device */
 sifive_test_create(memmap[VIRT_TEST].base);
 
diff --git a/include/hw/riscv/boot.h b/include/hw/riscv/boot.h
index baff11dd8a..5834c234aa 100644
--- a/include/hw/riscv/boot.h
+++ b/include/hw/riscv/boot.h
@@ -58,5 +58,6 @@ void riscv_rom_copy_firmware_info(MachineState *machine, 
hwaddr rom_base,
   hwaddr rom_size,
   uint32_t reset_vec_size,
   uint64_t kernel_entry);
+void riscv_setup_direct_kernel(hwaddr kernel_addr, hwaddr fdt_addr);
 
 #endif /* RISCV_BOOT_H */
diff --git a/target/riscv/cpu.c b/target/riscv/cpu.c
index f812998123..1c944872a3 100644
--- a/target/riscv/cpu.c
+++ b/target/riscv/cpu.c
@@ -29,6 +29,8 @@
 #include "hw/qdev-properties.h"
 #include "migration/vmstate.h"
 #include "fpu/softfloat-helpers.h"
+#include "sysemu/kvm.h"
+#include "kvm_riscv.h"
 
 /* RISC-V CPU definitions */
 
@@ -380,6 +382,12 @@ static void riscv_cpu_reset(DeviceState *dev)
 cs->exception_index = RISCV_EXCP_NONE;
 env->load_res = -1;
 set_default_nan_mode(1, >fp_status);
+
+#ifndef CONFIG_USER_ONLY
+if (kvm_enabled()) {
+kvm_riscv_reset_vcpu(cpu);
+}
+#endif
 }
 
 static void riscv_cpu_disas_set_info(CPUState *s, disassemble_info *info)
diff --git a/target/riscv/cpu.h b/target/riscv/cpu.h
index 0760c0af93..2807eb1bcb 100644
--- a/target/riscv/cpu.h
+++ b/target/riscv/cpu.h
@@ -255,6 +255,9 @@ struct CPURISCVState {
 
 /* Fields from here on are preserved across CPU reset. */
 QEMUTimer *timer; /* Internal timer */
+
+hwaddr kernel_addr;
+hwaddr fdt_addr;
 };
 
 OBJECT_DECLARE_TYPE(RISCVCPU, RISCVCPUClass,
diff --git a/target/riscv/kvm-stub.c b/target/riscv/kvm-stub.c
new file mode 100644
index 00..39b96fe3f4
--- /dev/null
+++ b/target/riscv/kvm-stub.c
@@ -0,0 +1,25 @@
+/*
+ * QEMU KVM RISC-V specific function stubs
+ *
+ * Copyright (c) 2020 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see .
+ */
+#include "qemu/osdep.h"
+#include "cpu.h"
+#include "kvm_riscv.h"
+
+void kvm_riscv_reset_vcpu(RISCVCPU *cpu)
+{
+abort();
+}
diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
index 5fe5ca4434..7f3ffcc2b4 100644
--- a/target/riscv/kvm.c
+++ b/target/riscv/kvm.c
@@ -37,6 +37,7 @@
 #include "hw/irq.h"
 #include "qemu/log.h"
 #include "hw/loader.h"
+#include "kvm_riscv.h"
 
 static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type, uint64_t 
idx)
 {
@@ -444,6 

[PATCH v1 02/12] target/riscv: Add target/riscv/kvm.c to place the public kvm interface

2021-11-20 Thread Yifei Jiang
Add target/riscv/kvm.c to place kvm_arch_* function needed by
kvm/kvm-all.c. Meanwhile, add kvm support in meson.build file.

Signed-off-by: Yifei Jiang 
Signed-off-by: Mingwang Li 
Reviewed-by: Alistair Francis 
---
 meson.build  |   2 +
 target/riscv/kvm.c   | 133 +++
 target/riscv/meson.build |   1 +
 3 files changed, 136 insertions(+)
 create mode 100644 target/riscv/kvm.c

diff --git a/meson.build b/meson.build
index 96de1a6ef9..ae35e76ea4 100644
--- a/meson.build
+++ b/meson.build
@@ -77,6 +77,8 @@ elif cpu in ['ppc', 'ppc64']
   kvm_targets = ['ppc-softmmu', 'ppc64-softmmu']
 elif cpu in ['mips', 'mips64']
   kvm_targets = ['mips-softmmu', 'mipsel-softmmu', 'mips64-softmmu', 
'mips64el-softmmu']
+elif cpu in ['riscv']
+  kvm_targets = ['riscv32-softmmu', 'riscv64-softmmu']
 else
   kvm_targets = []
 endif
diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
new file mode 100644
index 00..687dd4b621
--- /dev/null
+++ b/target/riscv/kvm.c
@@ -0,0 +1,133 @@
+/*
+ * RISC-V implementation of KVM hooks
+ *
+ * Copyright (c) 2020 Huawei Technologies Co., Ltd
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2 or later, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see .
+ */
+
+#include "qemu/osdep.h"
+#include 
+
+#include 
+
+#include "qemu-common.h"
+#include "qemu/timer.h"
+#include "qemu/error-report.h"
+#include "qemu/main-loop.h"
+#include "sysemu/sysemu.h"
+#include "sysemu/kvm.h"
+#include "sysemu/kvm_int.h"
+#include "cpu.h"
+#include "trace.h"
+#include "hw/pci/pci.h"
+#include "exec/memattrs.h"
+#include "exec/address-spaces.h"
+#include "hw/boards.h"
+#include "hw/irq.h"
+#include "qemu/log.h"
+#include "hw/loader.h"
+
+const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
+KVM_CAP_LAST_INFO
+};
+
+int kvm_arch_get_registers(CPUState *cs)
+{
+return 0;
+}
+
+int kvm_arch_put_registers(CPUState *cs, int level)
+{
+return 0;
+}
+
+int kvm_arch_release_virq_post(int virq)
+{
+return 0;
+}
+
+int kvm_arch_fixup_msi_route(struct kvm_irq_routing_entry *route,
+ uint64_t address, uint32_t data, PCIDevice *dev)
+{
+return 0;
+}
+
+int kvm_arch_destroy_vcpu(CPUState *cs)
+{
+return 0;
+}
+
+unsigned long kvm_arch_vcpu_id(CPUState *cpu)
+{
+return cpu->cpu_index;
+}
+
+void kvm_arch_init_irq_routing(KVMState *s)
+{
+}
+
+int kvm_arch_init_vcpu(CPUState *cs)
+{
+return 0;
+}
+
+int kvm_arch_msi_data_to_gsi(uint32_t data)
+{
+abort();
+}
+
+int kvm_arch_add_msi_route_post(struct kvm_irq_routing_entry *route,
+int vector, PCIDevice *dev)
+{
+return 0;
+}
+
+int kvm_arch_init(MachineState *ms, KVMState *s)
+{
+return 0;
+}
+
+int kvm_arch_irqchip_create(KVMState *s)
+{
+return 0;
+}
+
+int kvm_arch_process_async_events(CPUState *cs)
+{
+return 0;
+}
+
+void kvm_arch_pre_run(CPUState *cs, struct kvm_run *run)
+{
+}
+
+MemTxAttrs kvm_arch_post_run(CPUState *cs, struct kvm_run *run)
+{
+return MEMTXATTRS_UNSPECIFIED;
+}
+
+bool kvm_arch_stop_on_emulation_error(CPUState *cs)
+{
+return true;
+}
+
+int kvm_arch_handle_exit(CPUState *cs, struct kvm_run *run)
+{
+return 0;
+}
+
+bool kvm_arch_cpu_check_are_resettable(void)
+{
+return true;
+}
diff --git a/target/riscv/meson.build b/target/riscv/meson.build
index d5e0bc93ea..2faf08a941 100644
--- a/target/riscv/meson.build
+++ b/target/riscv/meson.build
@@ -19,6 +19,7 @@ riscv_ss.add(files(
   'bitmanip_helper.c',
   'translate.c',
 ))
+riscv_ss.add(when: 'CONFIG_KVM', if_true: files('kvm.c'))
 
 riscv_softmmu_ss = ss.source_set()
 riscv_softmmu_ss.add(files(
-- 
2.19.1



[PATCH v1 01/12] update-linux-headers: Add asm-riscv/kvm.h

2021-11-20 Thread Yifei Jiang
Add asm-riscv/kvm.h for RISC-V KVM, and update linux/kvm.h

Signed-off-by: Yifei Jiang 
Signed-off-by: Mingwang Li 
---
 linux-headers/asm-riscv/kvm.h | 128 ++
 linux-headers/linux/kvm.h |   8 +++
 2 files changed, 136 insertions(+)
 create mode 100644 linux-headers/asm-riscv/kvm.h

diff --git a/linux-headers/asm-riscv/kvm.h b/linux-headers/asm-riscv/kvm.h
new file mode 100644
index 00..f808ad1ce5
--- /dev/null
+++ b/linux-headers/asm-riscv/kvm.h
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright (C) 2019 Western Digital Corporation or its affiliates.
+ *
+ * Authors:
+ * Anup Patel 
+ */
+
+#ifndef __LINUX_KVM_RISCV_H
+#define __LINUX_KVM_RISCV_H
+
+#ifndef __ASSEMBLY__
+
+#include 
+#include 
+
+#define __KVM_HAVE_READONLY_MEM
+
+#define KVM_COALESCED_MMIO_PAGE_OFFSET 1
+
+#define KVM_INTERRUPT_SET  -1U
+#define KVM_INTERRUPT_UNSET-2U
+
+/* for KVM_GET_REGS and KVM_SET_REGS */
+struct kvm_regs {
+};
+
+/* for KVM_GET_FPU and KVM_SET_FPU */
+struct kvm_fpu {
+};
+
+/* KVM Debug exit structure */
+struct kvm_debug_exit_arch {
+};
+
+/* for KVM_SET_GUEST_DEBUG */
+struct kvm_guest_debug_arch {
+};
+
+/* definition of registers in kvm_run */
+struct kvm_sync_regs {
+};
+
+/* for KVM_GET_SREGS and KVM_SET_SREGS */
+struct kvm_sregs {
+};
+
+/* CONFIG registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
+struct kvm_riscv_config {
+   unsigned long isa;
+};
+
+/* CORE registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
+struct kvm_riscv_core {
+   struct user_regs_struct regs;
+   unsigned long mode;
+};
+
+/* Possible privilege modes for kvm_riscv_core */
+#define KVM_RISCV_MODE_S   1
+#define KVM_RISCV_MODE_U   0
+
+/* CSR registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
+struct kvm_riscv_csr {
+   unsigned long sstatus;
+   unsigned long sie;
+   unsigned long stvec;
+   unsigned long sscratch;
+   unsigned long sepc;
+   unsigned long scause;
+   unsigned long stval;
+   unsigned long sip;
+   unsigned long satp;
+   unsigned long scounteren;
+};
+
+/* TIMER registers for KVM_GET_ONE_REG and KVM_SET_ONE_REG */
+struct kvm_riscv_timer {
+   __u64 frequency;
+   __u64 time;
+   __u64 compare;
+   __u64 state;
+};
+
+/* Possible states for kvm_riscv_timer */
+#define KVM_RISCV_TIMER_STATE_OFF  0
+#define KVM_RISCV_TIMER_STATE_ON   1
+
+#define KVM_REG_SIZE(id)   \
+   (1U << (((id) & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT))
+
+/* If you need to interpret the index values, here is the key: */
+#define KVM_REG_RISCV_TYPE_MASK0xFF00
+#define KVM_REG_RISCV_TYPE_SHIFT   24
+
+/* Config registers are mapped as type 1 */
+#define KVM_REG_RISCV_CONFIG   (0x01 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_CONFIG_REG(name) \
+   (offsetof(struct kvm_riscv_config, name) / sizeof(unsigned long))
+
+/* Core registers are mapped as type 2 */
+#define KVM_REG_RISCV_CORE (0x02 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_CORE_REG(name)   \
+   (offsetof(struct kvm_riscv_core, name) / sizeof(unsigned long))
+
+/* Control and status registers are mapped as type 3 */
+#define KVM_REG_RISCV_CSR  (0x03 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_CSR_REG(name)\
+   (offsetof(struct kvm_riscv_csr, name) / sizeof(unsigned long))
+
+/* Timer registers are mapped as type 4 */
+#define KVM_REG_RISCV_TIMER(0x04 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_TIMER_REG(name)  \
+   (offsetof(struct kvm_riscv_timer, name) / sizeof(__u64))
+
+/* F extension registers are mapped as type 5 */
+#define KVM_REG_RISCV_FP_F (0x05 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_FP_F_REG(name)   \
+   (offsetof(struct __riscv_f_ext_state, name) / sizeof(__u32))
+
+/* D extension registers are mapped as type 6 */
+#define KVM_REG_RISCV_FP_D (0x06 << KVM_REG_RISCV_TYPE_SHIFT)
+#define KVM_REG_RISCV_FP_D_REG(name)   \
+   (offsetof(struct __riscv_d_ext_state, name) / sizeof(__u64))
+
+#endif
+
+#endif /* __LINUX_KVM_RISCV_H */
diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h
index bcaf66cc4d..5e290c3c3e 100644
--- a/linux-headers/linux/kvm.h
+++ b/linux-headers/linux/kvm.h
@@ -269,6 +269,7 @@ struct kvm_xen_exit {
 #define KVM_EXIT_AP_RESET_HOLD32
 #define KVM_EXIT_X86_BUS_LOCK 33
 #define KVM_EXIT_XEN  34
+#define KVM_EXIT_RISCV_SBI35
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -469,6 +470,13 @@ struct kvm_run {
} msr;
/* KVM_EXIT_XEN */
struct kvm_xen_exit xen;
+   /* KVM_EXIT_RISCV_SBI */
+   struct {
+   unsigned long extension_id;
+   unsigned long 

[PATCH v1 00/12] Add riscv kvm accel support

2021-11-20 Thread Yifei Jiang
This series adds both riscv32 and riscv64 kvm support, and implements
migration based on riscv.

Because of RISC-V KVM has been merged into the Linux master, so this
series are changed from RFC to patch v1.

Several steps to use this:
1. Build emulation
$ ./configure --target-list=riscv64-softmmu
$ make -j$(nproc)

2. Build kernel

3. Build QEMU VM
Cross built in riscv toolchain.
$ PKG_CONFIG_LIBDIR=
$ export PKG_CONFIG_SYSROOT_DIR=
$ ./configure --target-list=riscv64-softmmu --enable-kvm \
--cross-prefix=riscv64-linux-gnu- --disable-libiscsi --disable-glusterfs \
--disable-libusb --disable-usb-redir --audio-drv-list= --disable-opengl \
--disable-libxml2
$ make -j$(nproc)

4. Start emulation
$ ./qemu-system-riscv64 -M virt -m 4096M -cpu rv64,x-h=true -nographic \
-name guest=riscv-hyp,debug-threads=on \
-smp 4 \
-bios ./fw_jump.bin \
-kernel ./Image \
-drive file=./hyp.img,format=raw,id=hd0 \
-device virtio-blk-device,drive=hd0 \
-append "root=/dev/vda rw console=ttyS0 earlycon=sbi"

5. Start kvm-acceled QEMU VM in emulation
$ ./qemu-system-riscv64 -M virt,accel=kvm -m 1024M -cpu host -nographic \
-name guest=riscv-guset \
-smp 2 \
-bios none \
-kernel ./Image \
-drive file=./guest.img,format=raw,id=hd0 \
-device virtio-blk-device,drive=hd0 \
-append "root=/dev/vda rw console=ttyS0 earlycon=sbi"

Changes since RFC v6
- Rebase on recent commit 8627edfb3f1fca24a96a0954148885c3241c10f8
- Sync-up headers with Linux-5.16-rc1

Changes since RFC v5
- Rebase on QEMU v6.1.0-rc1 and kvm-riscv linux v19.
- Move kvm interrupt setting to riscv_cpu_update_mip().
- Replace __u64 with uint64_t.

Changes since RFC v4
- Rebase on QEMU v6.0.0-rc2 and kvm-riscv linux v17.
- Remove time scaling support as software solution is incomplete.
  Because it will cause unacceptable performance degradation. and
  We will post a better solution.
- Revise according to Alistair's review comments.
  - Remove compile time XLEN checks in kvm_riscv_reg_id
  - Surround TYPE_RISCV_CPU_HOST definition by CONFIG_KVM and share
it between RV32 and RV64.
  - Add kvm-stub.c for reduce unnecessary compilation checks.
  - Add riscv_setup_direct_kernel() to direct boot kernel for KVM.

Changes since RFC v3
- Rebase on QEMU v5.2.0-rc2 and kvm-riscv linux v15.
- Add time scaling support(New patches 13, 14 and 15).
- Fix the bug that guest vm can't reboot.

Changes since RFC v2
- Fix checkpatch error at target/riscv/sbi_ecall_interface.h.
- Add riscv migration support.

Changes since RFC v1
- Add separate SBI ecall interface header.
- Add riscv32 kvm accel support.

Yifei Jiang (12):
  update-linux-headers: Add asm-riscv/kvm.h
  target/riscv: Add target/riscv/kvm.c to place the public kvm interface
  target/riscv: Implement function kvm_arch_init_vcpu
  target/riscv: Implement kvm_arch_get_registers
  target/riscv: Implement kvm_arch_put_registers
  target/riscv: Support start kernel directly by KVM
  target/riscv: Support setting external interrupt by KVM
  target/riscv: Handle KVM_EXIT_RISCV_SBI exit
  target/riscv: Add host cpu type
  target/riscv: Add kvm_riscv_get/put_regs_timer
  target/riscv: Implement virtual time adjusting with vm state changing
  target/riscv: Support virtual time context synchronization

 hw/riscv/boot.c|  11 +
 hw/riscv/virt.c|   7 +
 include/hw/riscv/boot.h|   1 +
 linux-headers/asm-riscv/kvm.h  | 128 ++
 linux-headers/linux/kvm.h  |   8 +
 meson.build|   2 +
 target/riscv/cpu.c |  57 +++
 target/riscv/cpu.h |  10 +
 target/riscv/cpu_helper.c  |  27 --
 target/riscv/kvm-stub.c|  30 ++
 target/riscv/kvm.c | 610 +
 target/riscv/kvm_riscv.h   |  25 ++
 target/riscv/machine.c |  14 +
 target/riscv/meson.build   |   1 +
 target/riscv/sbi_ecall_interface.h |  72 
 15 files changed, 976 insertions(+), 27 deletions(-)
 create mode 100644 linux-headers/asm-riscv/kvm.h
 create mode 100644 target/riscv/kvm-stub.c
 create mode 100644 target/riscv/kvm.c
 create mode 100644 target/riscv/kvm_riscv.h
 create mode 100644 target/riscv/sbi_ecall_interface.h

-- 
2.19.1



[PATCH v1 03/12] target/riscv: Implement function kvm_arch_init_vcpu

2021-11-20 Thread Yifei Jiang
Get isa info from kvm while kvm init.

Signed-off-by: Yifei Jiang 
Signed-off-by: Mingwang Li 
Reviewed-by: Alistair Francis 
---
 target/riscv/kvm.c | 32 +++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/target/riscv/kvm.c b/target/riscv/kvm.c
index 687dd4b621..9f9692fb9e 100644
--- a/target/riscv/kvm.c
+++ b/target/riscv/kvm.c
@@ -38,6 +38,23 @@
 #include "qemu/log.h"
 #include "hw/loader.h"
 
+static uint64_t kvm_riscv_reg_id(CPURISCVState *env, uint64_t type, uint64_t 
idx)
+{
+uint64_t id = KVM_REG_RISCV | type | idx;
+
+switch (riscv_cpu_mxl(env)) {
+case MXL_RV32:
+id |= KVM_REG_SIZE_U32;
+break;
+case MXL_RV64:
+id |= KVM_REG_SIZE_U64;
+break;
+default:
+g_assert_not_reached();
+}
+return id;
+}
+
 const KVMCapabilityInfo kvm_arch_required_capabilities[] = {
 KVM_CAP_LAST_INFO
 };
@@ -79,7 +96,20 @@ void kvm_arch_init_irq_routing(KVMState *s)
 
 int kvm_arch_init_vcpu(CPUState *cs)
 {
-return 0;
+int ret = 0;
+target_ulong isa;
+RISCVCPU *cpu = RISCV_CPU(cs);
+CPURISCVState *env = >env;
+uint64_t id;
+
+id = kvm_riscv_reg_id(env, KVM_REG_RISCV_CONFIG, 
KVM_REG_RISCV_CONFIG_REG(isa));
+ret = kvm_get_one_reg(cs, id, );
+if (ret) {
+return ret;
+}
+env->misa_mxl |= isa;
+
+return ret;
 }
 
 int kvm_arch_msi_data_to_gsi(uint32_t data)
-- 
2.19.1