[PATCH v1] tools/ocaml: fix warnings

2024-03-27 Thread Edwin Török
Do not rely on the string values of the `Failure` exception,
 but use the `_opt` functions instead.

Signed-off-by: Edwin Török 
---
 tools/ocaml/xenstored/config.ml | 20 +++-
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/tools/ocaml/xenstored/config.ml b/tools/ocaml/xenstored/config.ml
index 95ef745a54..e0df236f73 100644
--- a/tools/ocaml/xenstored/config.ml
+++ b/tools/ocaml/xenstored/config.ml
@@ -83,25 +83,27 @@ let validate cf expected other =
   let err = ref [] in
   let append x = err := x :: !err in
   List.iter (fun (k, v) ->
+  let parse ~err_msg parser v f =
+match parser v with
+| None -> append (k, err_msg)
+| Some r -> f r
+  in
   try
 if not (List.mem_assoc k expected) then
   other k v
 else let ty = List.assoc k expected in
   match ty with
   | Unit f   -> f ()
-  | Bool f   -> f (bool_of_string v)
+  | Bool f   -> parse ~err_msg:"expect bool arg" 
bool_of_string_opt v f
   | String f -> f v
-  | Int f-> f (int_of_string v)
-  | Float f  -> f (float_of_string v)
-  | Set_bool r   -> r := (bool_of_string v)
+  | Int f-> parse ~err_msg:"expect int arg" int_of_string_opt 
v f
+  | Float f  -> parse ~err_msg:"expect float arg" 
float_of_string_opt v f
+  | Set_bool r   -> parse ~err_msg:"expect bool arg" 
bool_of_string_opt v (fun x -> r := x)
   | Set_string r -> r := v
-  | Set_int r-> r := int_of_string v
-  | Set_float r  -> r := (float_of_string v)
+  | Set_int r-> parse ~err_msg:"expect int arg" int_of_string_opt 
v (fun x -> r:= x)
+  | Set_float r  -> parse ~err_msg:"expect float arg" 
float_of_string_opt v (fun x -> r := x)
   with
   | Not_found -> append (k, "unknown key")
-  | Failure "int_of_string"   -> append (k, "expect int arg")
-  | Failure "bool_of_string"  -> append (k, "expect bool arg")
-  | Failure "float_of_string" -> append (k, "expect float arg")
   | exn   -> append (k, Printexc.to_string exn)
 ) cf;
   if !err != [] then raise (Error !err)
-- 
2.44.0




[PATCH v1 1/2] oxenstored: use Map instead of Hashtbl for quotas

2024-01-31 Thread Edwin Török
On a stress test running 1000 VMs flamegraphs have shown that
`oxenstored` spends a large amount of time in `Hashtbl.copy` and the GC.

Hashtable complexity:
 * read/write: O(1) average
 * copy: O(domains) -- copying the entire table

Map complexity:
 * read/write: O(log n) worst case
 * copy: O(1) -- a word copy

We always perform at least one 'copy' when processing each xenstore
packet (regardless whether it is a readonly operation or inside a
transaction or not), so the actual complexity per packet is:
  * Hashtbl: O(domains)
  * Map: O(log domains)

Maps are the clear winner, and a better fit for the immutable xenstore
tree.

Signed-off-by: Edwin Török 
---
 tools/ocaml/xenstored/quota.ml | 65 ++
 1 file changed, 34 insertions(+), 31 deletions(-)

diff --git a/tools/ocaml/xenstored/quota.ml b/tools/ocaml/xenstored/quota.ml
index 300d78a50b..f6e28ecc6a 100644
--- a/tools/ocaml/xenstored/quota.ml
+++ b/tools/ocaml/xenstored/quota.ml
@@ -23,66 +23,69 @@ let activate = ref true
 let maxent = ref (1000)
 let maxsize = ref (2048)
 
+module Domid = struct
+  type t = Xenctrl.domid
+  let compare (a:t) (b:t) = compare a b
+end
+
+module DomidMap = Map.Make(Domid)
+
 type t = {
   maxent: int;   (* max entities per domU *)
   maxsize: int;  (* max size of data store in one node *)
-  cur: (Xenctrl.domid, int) Hashtbl.t; (* current domains quota *)
+  mutable cur: int DomidMap.t; (* current domains quota *)
 }
 
 let to_string quota domid =
-  if Hashtbl.mem quota.cur domid
-  then Printf.sprintf "dom%i quota: %i/%i" domid (Hashtbl.find quota.cur 
domid) quota.maxent
-  else Printf.sprintf "dom%i quota: not set" domid
+  try
+Printf.sprintf "dom%i quota: %i/%i" domid (DomidMap.find domid quota.cur) 
quota.maxent
+  with Not_found ->
+Printf.sprintf "dom%i quota: not set" domid
 
 let create () =
-  { maxent = !maxent; maxsize = !maxsize; cur = Hashtbl.create 100; }
+  { maxent = !maxent; maxsize = !maxsize; cur = DomidMap.empty; }
 
-let copy quota = { quota with cur = (Hashtbl.copy quota.cur) }
+let copy quota = { quota with cur = quota.cur }
 
-let del quota id = Hashtbl.remove quota.cur id
+let del quota id = { quota with cur = DomidMap.remove id quota.cur }
 
 let _check quota id size =
   if size > quota.maxsize then (
 warn "domain %u err create entry: data too big %d" id size;
 raise Data_too_big
   );
-  if id > 0 && Hashtbl.mem quota.cur id then
-let entry = Hashtbl.find quota.cur id in
+  if id > 0 then
+  try
+let entry = DomidMap.find id quota.cur in
 if entry >= quota.maxent then (
   warn "domain %u cannot create entry: quota reached" id;
   raise Limit_reached
 )
+  with Not_found -> ()
 
 let check quota id size =
   if !activate then
 _check quota id size
 
-let get_entry quota id = Hashtbl.find quota.cur id
+let find_or_zero quota_cur id =
+  try DomidMap.find id quota_cur with Not_found -> 0
 
-let set_entry quota id nb =
-  if nb = 0
-  then Hashtbl.remove quota.cur id
-  else begin
-if Hashtbl.mem quota.cur id then
-  Hashtbl.replace quota.cur id nb
-else
-  Hashtbl.add quota.cur id nb
-  end
+let update_entry quota_cur id diff =
+  let nb = diff + find_or_zero quota_cur id in
+  if nb = 0 then DomidMap.remove id quota_cur
+  else DomidMap.add id nb quota_cur
 
 let del_entry quota id =
-  try
-let nb = get_entry quota id in
-set_entry quota id (nb - 1)
-  with Not_found -> ()
+  quota.cur <- update_entry quota.cur id (-1)
 
 let add_entry quota id =
-  let nb = try get_entry quota id with Not_found -> 0 in
-  set_entry quota id (nb + 1)
-
-let add quota diff =
-  Hashtbl.iter (fun id nb -> set_entry quota id (get_entry quota id + nb)) 
diff.cur
+  quota.cur <- update_entry quota.cur id (+1)
 
 let merge orig_quota mod_quota dest_quota =
-  Hashtbl.iter (fun id nb -> let diff = nb - (try get_entry orig_quota id with 
Not_found -> 0) in
-  if diff <> 0 then
-set_entry dest_quota id ((try get_entry dest_quota id with Not_found 
-> 0) + diff)) mod_quota.cur
+  let fold_merge id nb dest =
+match nb - find_or_zero orig_quota.cur id with
+| 0 -> dest (* not modified *)
+| diff -> update_entry dest id diff (* update with [x=x+diff] *)
+  in
+  dest_quota.cur <- DomidMap.fold fold_merge mod_quota.cur dest_quota.cur
+  (* dest_quota = dest_quota + (mod_quota - orig_quota) *)
-- 
2.43.0




[PATCH v1 0/2] reduce oxenstored quota processing overhead under load

2024-01-31 Thread Edwin Török
A recent stress test with 1000 VMs has shown that oxenstored spends ~40% of 
time in Quota.copy,
even when processing read-only xenstore commands.
Use an immutable data structure instead.

I have tested this in the gitlab CI here: 
https://gitlab.com/xen-project/people/edwintorok/xen/-/pipelines/1158302827
For convenience the changes in this (and the other series I sent out) are also 
available as a git repository:
https://gitlab.com/xen-project/people/edwintorok/xen/-/compare/private%2Fedvint%2Fdune1x...private%2Fedvint%2Fno-hashtbl-dev?from_project_id=47263871=false

I haven't yet measured the speedup, but thought to send out the patch for 
review early.

Edwin Török (2):
  oxenstored: use Map instead of Hashtbl for quotas
  oxenstored: make Quota.t pure

 tools/ocaml/xenstored/quota.ml | 65 ++
 tools/ocaml/xenstored/store.ml | 17 +
 2 files changed, 44 insertions(+), 38 deletions(-)

-- 
2.43.0




[PATCH v1 2/2] oxenstored: make Quota.t pure

2024-01-31 Thread Edwin Török
Now that we no longer have a hashtable inside we can make Quota.t pure,
and push the mutable update to its callers.
Store.t already had a mutable Quota.t field.

No functional change.

Signed-off-by: Edwin Török 
---
 tools/ocaml/xenstored/quota.ml |  8 
 tools/ocaml/xenstored/store.ml | 17 ++---
 2 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/tools/ocaml/xenstored/quota.ml b/tools/ocaml/xenstored/quota.ml
index f6e28ecc6a..1f652040d8 100644
--- a/tools/ocaml/xenstored/quota.ml
+++ b/tools/ocaml/xenstored/quota.ml
@@ -33,7 +33,7 @@ module DomidMap = Map.Make(Domid)
 type t = {
   maxent: int;   (* max entities per domU *)
   maxsize: int;  (* max size of data store in one node *)
-  mutable cur: int DomidMap.t; (* current domains quota *)
+  cur: int DomidMap.t; (* current domains quota *)
 }
 
 let to_string quota domid =
@@ -76,10 +76,10 @@ let update_entry quota_cur id diff =
   else DomidMap.add id nb quota_cur
 
 let del_entry quota id =
-  quota.cur <- update_entry quota.cur id (-1)
+  {quota with cur = update_entry quota.cur id (-1)}
 
 let add_entry quota id =
-  quota.cur <- update_entry quota.cur id (+1)
+  {quota with cur = update_entry quota.cur id (+1)}
 
 let merge orig_quota mod_quota dest_quota =
   let fold_merge id nb dest =
@@ -87,5 +87,5 @@ let merge orig_quota mod_quota dest_quota =
 | 0 -> dest (* not modified *)
 | diff -> update_entry dest id diff (* update with [x=x+diff] *)
   in
-  dest_quota.cur <- DomidMap.fold fold_merge mod_quota.cur dest_quota.cur
+  {dest_quota with cur = DomidMap.fold fold_merge mod_quota.cur dest_quota.cur}
   (* dest_quota = dest_quota + (mod_quota - orig_quota) *)
diff --git a/tools/ocaml/xenstored/store.ml b/tools/ocaml/xenstored/store.ml
index 38a4945372..9b8dd2812d 100644
--- a/tools/ocaml/xenstored/store.ml
+++ b/tools/ocaml/xenstored/store.ml
@@ -85,7 +85,9 @@ module Node = struct
   raise Define.Permission_denied;
 end
 
-  let rec recurse fct node = fct node; SymbolMap.iter (fun _ -> recurse fct) 
node.children
+  let rec recurse fct node acc =
+let acc = fct node acc in
+SymbolMap.fold (fun _ -> recurse fct) node.children acc
 
   (** [recurse_filter_map f tree] applies [f] on each node in the tree 
recursively,
   possibly removing some nodes.
@@ -408,7 +410,7 @@ let dump_buffer store = dump_store_buf store.root
 let set_node store path node orig_quota mod_quota =
   let root = Path.set_node store.root path node in
   store.root <- root;
-  Quota.merge orig_quota mod_quota store.quota
+  store.quota <- Quota.merge orig_quota mod_quota store.quota
 
 let write store perm path value =
   let node, existing = get_deepest_existing_node store path in
@@ -422,7 +424,7 @@ let write store perm path value =
   let root, node_created = path_write store perm path value in
   store.root <- root;
   if node_created
-  then Quota.add_entry store.quota owner
+  then store.quota <- Quota.add_entry store.quota owner
 
 let mkdir store perm path =
   let node, existing = get_deepest_existing_node store path in
@@ -431,7 +433,7 @@ let mkdir store perm path =
   if not (existing || (Perms.Connection.is_dom0 perm)) then Quota.check 
store.quota owner 0;
   store.root <- path_mkdir store perm path;
   if not existing then
-Quota.add_entry store.quota owner
+store.quota <- Quota.add_entry store.quota owner
 
 let rm store perm path =
   let rmed_node = Path.get_node store.root path in
@@ -439,7 +441,7 @@ let rm store perm path =
   | None -> raise Define.Doesnt_exist
   | Some rmed_node ->
 store.root <- path_rm store perm path;
-Node.recurse (fun node -> Quota.del_entry store.quota (Node.get_owner 
node)) rmed_node
+store.quota <- Node.recurse (fun node quota -> Quota.del_entry quota 
(Node.get_owner node)) rmed_node store.quota
 
 let setperms store perm path nperms =
   match Path.get_node store.root path with
@@ -450,8 +452,9 @@ let setperms store perm path nperms =
 if not ((old_owner = new_owner) || (Perms.Connection.is_dom0 perm)) then
   raise Define.Permission_denied;
 store.root <- path_setperms store perm path nperms;
-Quota.del_entry store.quota old_owner;
-Quota.add_entry store.quota new_owner
+store.quota <-
+  let quota = Quota.del_entry store.quota old_owner in
+  Quota.add_entry quota new_owner
 
 let reset_permissions store domid =
   Logging.info "store|node" "Cleaning up xenstore ACLs for domid %d" domid;
-- 
2.43.0




[PATCH v1 1/2] oxenstored: fix build on OCaml 5.x

2024-01-31 Thread Edwin Török
Char.lowercase got removed in OCaml 5.0 (it has been deprecated since 2014).

Char.lowercase_ascii has existed since OCaml 4.03, so that is the new
minimum version for oxenstored.

(Given the choice between supporting a new release and dropping support
for an 8y+ old release, we drop support for OCaml <4.03)

Signed-off-by: Edwin Török 
---
 tools/configure   | 2 +-
 tools/configure.ac| 2 +-
 tools/ocaml/xenstored/disk.ml | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/configure b/tools/configure
index 0135a0059a..5723efaa56 100755
--- a/tools/configure
+++ b/tools/configure
@@ -6836,7 +6836,7 @@ else
  -e 's/[^0-9]//g'`
 
 
-  ax_compare_version_B=`echo "4.02.0" | sed -e 's/\([0-9]*\)/Z\1Z/g' \
+  ax_compare_version_B=`echo "4.03.0" | sed -e 's/\([0-9]*\)/Z\1Z/g' \
  -e 's/Z\([0-9]\)Z/Z0\1Z/g' \
  -e 's/Z\([0-9][0-9]\)Z/Z0\1Z/g' \
  -e 's/Z\([0-9][0-9][0-9]\)Z/Z0\1Z/g' \
diff --git a/tools/configure.ac b/tools/configure.ac
index 618ef8c63f..c979c3de7c 100644
--- a/tools/configure.ac
+++ b/tools/configure.ac
@@ -336,7 +336,7 @@ AS_IF([test "x$ocamltools" = "xy"], [
 AC_MSG_ERROR([Ocaml tools enabled, but missing ocamlopt or 
ocamlfind])])
 ocamltools="n"
 ], [
-AX_COMPARE_VERSION([$OCAMLVERSION], [lt], [4.02.0], [
+AX_COMPARE_VERSION([$OCAMLVERSION], [lt], [4.03.0], [
 AS_IF([test "x$enable_ocamltools" = "xyes"], [
 AC_MSG_ERROR([Your version of OCaml: $OCAMLVERSION is not 
supported])])
 ocamltools="n"
diff --git a/tools/ocaml/xenstored/disk.ml b/tools/ocaml/xenstored/disk.ml
index 91f945f2bd..ccaa048faf 100644
--- a/tools/ocaml/xenstored/disk.ml
+++ b/tools/ocaml/xenstored/disk.ml
@@ -30,7 +30,7 @@ let undec c =
   | _  -> raise (Failure "undecify")
 
 let unhex c =
-  let c = Char.lowercase c in
+  let c = Char.lowercase_ascii c in
   match c with
   | '0' .. '9' -> (Char.code c) - (Char.code '0')
   | 'a' .. 'f' -> (Char.code c) - (Char.code 'a') + 10
-- 
2.43.0




[PATCH v1 0/2] tools/ocaml: support OCaml 5.x, drop support for <=4.05

2024-01-31 Thread Edwin Török
Fix building oxenstored with OCaml 5.x.
OCaml 5.x has removed some functions that have been deprecated for many years,
in order to support OCaml 5.x we need to drop support for OCaml 4.02.

Tested in gitlab CI (together with my other series):
https://gitlab.com/xen-project/people/edwintorok/xen/-/pipelines/1158302827

Edwin Török (2):
  oxenstored: fix build on OCaml 5.x
  tools/ocaml: bump minimum version to OCaml 4.05

 tools/configure   | 2 +-
 tools/configure.ac| 2 +-
 tools/ocaml/xenstored/disk.ml | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

-- 
2.43.0




[PATCH v1 2/2] tools/ocaml: bump minimum version to OCaml 4.05

2024-01-31 Thread Edwin Török
We tried bumping to 4.06.1 [1] previously, but OSSTest was holding us
back.
So bump to OCaml 4.05 instead, which should match the version on
OSSTest?

[1]: 
https://patchwork.kernel.org/project/xen-devel/patch/ac885ce2b63159d26d857dc3e53cf8aa63ae3646.1659118200.git.edvin.to...@citrix.com/

Signed-off-by: Edwin Török 
---
 tools/configure| 2 +-
 tools/configure.ac | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/configure b/tools/configure
index 5723efaa56..3d557234b3 100755
--- a/tools/configure
+++ b/tools/configure
@@ -6836,7 +6836,7 @@ else
  -e 's/[^0-9]//g'`
 
 
-  ax_compare_version_B=`echo "4.03.0" | sed -e 's/\([0-9]*\)/Z\1Z/g' \
+  ax_compare_version_B=`echo "4.05.0" | sed -e 's/\([0-9]*\)/Z\1Z/g' \
  -e 's/Z\([0-9]\)Z/Z0\1Z/g' \
  -e 's/Z\([0-9][0-9]\)Z/Z0\1Z/g' \
  -e 's/Z\([0-9][0-9][0-9]\)Z/Z0\1Z/g' \
diff --git a/tools/configure.ac b/tools/configure.ac
index c979c3de7c..851887080c 100644
--- a/tools/configure.ac
+++ b/tools/configure.ac
@@ -336,7 +336,7 @@ AS_IF([test "x$ocamltools" = "xy"], [
 AC_MSG_ERROR([Ocaml tools enabled, but missing ocamlopt or 
ocamlfind])])
 ocamltools="n"
 ], [
-AX_COMPARE_VERSION([$OCAMLVERSION], [lt], [4.03.0], [
+AX_COMPARE_VERSION([$OCAMLVERSION], [lt], [4.05.0], [
 AS_IF([test "x$enable_ocamltools" = "xyes"], [
 AC_MSG_ERROR([Your version of OCaml: $OCAMLVERSION is not 
supported])])
 ocamltools="n"
-- 
2.43.0




[RFC PATCH 20/22] x86/PMUv5: limit available fixed PMCs and enable support

2023-10-25 Thread Edwin Török
From: Edwin Török 

AnyThread deprecation means a bit in 0xa edx, which we pass through.
(we could also avoid doing the anythread masking, but we need that
 for version <= 4 support).

Fixed Counter enumeration means we need to limit fixed counters if we
hide any.

Domain separation needs no action from the hypervisor AFAICT.

Signed-off-by: Edwin Török 
---
 xen/arch/x86/cpuid.c| 3 ++-
 xen/arch/x86/include/asm/vpmu.h | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
index 12e768ae87..8900943bcd 100644
--- a/xen/arch/x86/cpuid.c
+++ b/xen/arch/x86/cpuid.c
@@ -338,7 +338,8 @@ void guest_cpuid(const struct vcpu *v, uint32_t leaf,
 res->a = u.eax;
 
 /* We only implement 3 fixed function counters */
-if ( (res->d & 0x1f) > fixed_pmc_cnt )
+res->c &= ~((1 << fixed_pmc_cnt) - 1);
+if ( (res->d & 0x1f) > fixed_pmc_cnt)
 res->d = (res->d & ~0x1f) | fixed_pmc_cnt;
 }
 break;
diff --git a/xen/arch/x86/include/asm/vpmu.h b/xen/arch/x86/include/asm/vpmu.h
index eaededadb5..f066b17e45 100644
--- a/xen/arch/x86/include/asm/vpmu.h
+++ b/xen/arch/x86/include/asm/vpmu.h
@@ -74,7 +74,7 @@ struct vpmu_struct {
 #define VPMU_CPU_HAS_DS 0x1000 /* Has Debug Store */
 #define VPMU_CPU_HAS_BTS0x2000 /* Has Branch Trace Store */
 
-#define VPMU_VERSION_MAX0x4
+#define VPMU_VERSION_MAX0x5
 #define VPMU_VERSION_MAX_SUPPORTED  0x5
 
 static inline void vpmu_set(struct vpmu_struct *vpmu, const u32 mask)
-- 
2.41.0




[RFC PATCH 22/22] x86/AMD: add IRPerf support

2023-10-25 Thread Edwin Török
From: Edwin Török 

Instruction retired perf counter, enabled by writing to a bit in HWCR.

Signed-off-by: Edwin Török 
---
 xen/arch/x86/include/asm/msr-index.h| 1 +
 xen/arch/x86/msr.c  | 7 +++
 xen/include/public/arch-x86/cpufeatureset.h | 2 +-
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/include/asm/msr-index.h 
b/xen/arch/x86/include/asm/msr-index.h
index 061b07c7ae..1d94fe3a5b 100644
--- a/xen/arch/x86/include/asm/msr-index.h
+++ b/xen/arch/x86/include/asm/msr-index.h
@@ -393,6 +393,7 @@
 
 #define MSR_K8_HWCR0xc0010015
 #define K8_HWCR_TSC_FREQ_SEL   (1ULL << 24)
+#define K8_HWCR_IRPERF_EN  (1ULL << 30)
 #define K8_HWCR_CPUID_USER_DIS (1ULL << 35)
 
 #define MSR_K7_FID_VID_CTL 0xc0010041
diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
index 483b5e4f70..b3cd851d9d 100644
--- a/xen/arch/x86/msr.c
+++ b/xen/arch/x86/msr.c
@@ -584,6 +584,13 @@ int guest_wrmsr(struct vcpu *v, uint32_t msr, uint64_t val)
 }
 break;
 
+case MSR_K8_HWCR:
+if ( !(cp->x86_vendor & X86_VENDOR_AMD) ||
+ (val & ~K8_HWCR_IRPERF_EN) ||
+ wrmsr_safe(msr, val) != 0 )
+goto gp_fault;
+break;
+
 case MSR_AMD64_DE_CFG:
 /*
  * OpenBSD 6.7 will panic if writing to DE_CFG triggers a #GP:
diff --git a/xen/include/public/arch-x86/cpufeatureset.h 
b/xen/include/public/arch-x86/cpufeatureset.h
index 5faca0bf7a..40f74cd5e8 100644
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -241,7 +241,7 @@ XEN_CPUFEATURE(EFRO,  7*32+10) /*   APERF/MPERF 
Read Only interface */
 
 /* AMD-defined CPU features, CPUID level 0x8008.ebx, word 8 */
 XEN_CPUFEATURE(CLZERO,8*32+ 0) /*A  CLZERO instruction */
-XEN_CPUFEATURE(IRPERF,8*32+ 1) /* Instruction Retired Performance 
Counter */
+XEN_CPUFEATURE(IRPERF,8*32+ 1) /*A! Instruction Retired Performance 
Counter */
 XEN_CPUFEATURE(RSTR_FP_ERR_PTRS, 8*32+ 2) /*A  (F)X{SAVE,RSTOR} always 
saves/restores FPU Error pointers */
 XEN_CPUFEATURE(WBNOINVD,  8*32+ 9) /*   WBNOINVD instruction */
 XEN_CPUFEATURE(IBPB,  8*32+12) /*A  IBPB support only (no IBRS, used 
by AMD) */
-- 
2.41.0




[RFC PATCH 18/22] x86/PMUv4: disable intercept for PERF_GLOBAL_STATUS

2023-10-25 Thread Edwin Török
From: Edwin Török 

Now that we have a way to set PERF_GLOBAL_STATUS by writing to
PERF_GLOBAL_STATUS_RESET (== PERF_GLOBAL_OVF_CTRL) and
PERF_GLOBAL_STATUS_SET we do not need to intercept this MSR anymore.

We can save/restore its state when saving/loading vPMU state, and
otherwise let the guest read it directly.

This is an optimization, perhaps it'd need a flag to disable it for
debugging purposes.

Signed-off-by: Edwin Török 
---
 xen/arch/x86/cpu/vpmu_intel.c | 19 ++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/cpu/vpmu_intel.c b/xen/arch/x86/cpu/vpmu_intel.c
index 5e660af395..59d0b2ca36 100644
--- a/xen/arch/x86/cpu/vpmu_intel.c
+++ b/xen/arch/x86/cpu/vpmu_intel.c
@@ -67,6 +67,7 @@ static bool_t __read_mostly full_width_write;
 
 /* Number of general-purpose and fixed performance counters */
 unsigned int __read_mostly arch_pmc_cnt, fixed_pmc_cnt;
+static unsigned int __read_mostly vpmu_version;
 
 /* Masks used for testing whether and MSR is valid */
 #define ARCH_CTRL_MASK  (~((1ull << 32) - 1) | (1ull << 21) | 
ARCH_CNTR_PIN_CONTROL)
@@ -228,6 +229,9 @@ static void core2_vpmu_set_msr_bitmap(struct vcpu *v)
 
 vmx_clear_msr_intercept(v, MSR_CORE_PERF_FIXED_CTR_CTRL, VMX_MSR_R);
 vmx_clear_msr_intercept(v, MSR_IA32_DS_AREA, VMX_MSR_R);
+
+if ( vpmu_version >= 4 )
+vmx_clear_msr_intercept(v, MSR_CORE_PERF_GLOBAL_STATUS, VMX_MSR_R);
 }
 
 static void core2_vpmu_unset_msr_bitmap(struct vcpu *v)
@@ -250,6 +254,9 @@ static void core2_vpmu_unset_msr_bitmap(struct vcpu *v)
 
 vmx_set_msr_intercept(v, MSR_CORE_PERF_FIXED_CTR_CTRL, VMX_MSR_R);
 vmx_set_msr_intercept(v, MSR_IA32_DS_AREA, VMX_MSR_R);
+
+if ( vpmu_version >= 4 )
+vmx_set_msr_intercept(v, MSR_CORE_PERF_GLOBAL_STATUS, VMX_MSR_R);
 }
 
 static inline void __core2_vpmu_save(struct vcpu *v)
@@ -268,7 +275,7 @@ static inline void __core2_vpmu_save(struct vcpu *v)
 rdmsrl(MSR_P6_EVNTSEL(i), xen_pmu_cntr_pair[i].control);
 }
 
-if ( !is_hvm_vcpu(v) )
+if ( !is_hvm_vcpu(v) || vpmu_version >= 4 )
 rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, core2_vpmu_cxt->global_status);
 /* Save MSR to private context to make it fork-friendly */
 else if ( mem_sharing_enabled(v->domain) )
@@ -338,6 +345,15 @@ static inline void __core2_vpmu_load(struct vcpu *v)
 else if ( mem_sharing_is_fork(v->domain) )
 vmx_write_guest_msr(v, MSR_CORE_PERF_GLOBAL_CTRL,
 core2_vpmu_cxt->global_ctrl);
+
+if ( vpmu_version >= 4 ) {
+const uint64_t global_status = core2_vpmu_cxt->global_status;
+const uint64_t reset = (~global_status) & global_ovf_ctrl_mask ;
+if ( reset )
+wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, reset);
+if ( global_status )
+wrmsrl(MSR_CORE_PERF_GLOBAL_STATUS_SET, global_status);
+}
 }
 
 static int core2_vpmu_verify(struct vcpu *v)
@@ -917,6 +933,7 @@ const struct arch_vpmu_ops *__init core2_vpmu_init(void)
 printk(XENLOG_INFO "VPMU: PMU version %u is not fully supported. "
"Emulating version %d\n", version, VPMU_VERSION_MAX);
 }
+vpmu_version = version;
 
 if ( current_cpu_data.x86 != 6 )
 {
-- 
2.41.0




[RFC PATCH 21/22] x86/AMD: fix CPUID for PerfCtr{4,5}

2023-10-25 Thread Edwin Török
From: Edwin Török 

These are available, but were hidden by CPUID previously.

There are IR (all guests), NB and L2I (dom0 only) performance counters too
that need to be implemented, add placeholder entries for them.

Signed-off-by: Edwin Török 
---
 xen/arch/x86/cpu-policy.c   | 14 +++---
 xen/arch/x86/hvm/svm/svm.c  |  1 +
 xen/arch/x86/pv/emul-priv-op.c  |  1 +
 xen/include/public/arch-x86/cpufeatureset.h |  4 
 4 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
index e38b648f7d..4242a21e1d 100644
--- a/xen/arch/x86/cpu-policy.c
+++ b/xen/arch/x86/cpu-policy.c
@@ -340,9 +340,16 @@ static void recalculate_misc(struct cpu_policy *p)
 p->extd.raw[0x1e] = EMPTY_LEAF; /* TopoExt APIC ID/Core/Node */
 p->extd.raw[0x1f] = EMPTY_LEAF; /* SEV */
 p->extd.raw[0x20] = EMPTY_LEAF; /* Platform QoS */
-break;
-}
-}
+
+/* These are not implemented yet, hide from CPUID.
+ * When they become implemented, make them available when full vpmu is 
on */
+p->extd.irperf = 0;
+p->extd.perfctrextnb = 0;
+p->extd.perfctrextl2i = 0;
+
+ break;
+ }
+ }
 
 void calculate_raw_cpu_policy(void)
 {
@@ -391,6 +398,7 @@ static void __init calculate_host_policy(void)
 if ( vpmu_mode == XENPMU_MODE_OFF ) {
 p->basic.raw[0xa] = EMPTY_LEAF;
 p->basic.pdcm = 0;
+p->extd.perfctrextcore = 0;
 }
 
 if ( p->extd.svm )
diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 45f8e1ffd1..ecb6184f51 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -1905,6 +1905,7 @@ static int cf_check svm_msr_read_intercept(
 case MSR_AMD_FAM15H_EVNTSEL3:
 case MSR_AMD_FAM15H_EVNTSEL4:
 case MSR_AMD_FAM15H_EVNTSEL5:
+/* TODO: IRPerfCnt, L2I_* and NB_* support */
 if ( vpmu_do_rdmsr(msr, msr_content) )
 goto gpf;
 break;
diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
index 105485bb1e..8d802b5df0 100644
--- a/xen/arch/x86/pv/emul-priv-op.c
+++ b/xen/arch/x86/pv/emul-priv-op.c
@@ -1156,6 +1156,7 @@ static int cf_check write_msr(
 if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
 {
 vpmu_msr = true;
+/* fall-through */
 case MSR_AMD_FAM15H_EVNTSEL0 ... MSR_AMD_FAM15H_PERFCTR5:
 case MSR_K7_EVNTSEL0 ... MSR_K7_PERFCTR3:
 if ( vpmu_msr || (boot_cpu_data.x86_vendor &
diff --git a/xen/include/public/arch-x86/cpufeatureset.h 
b/xen/include/public/arch-x86/cpufeatureset.h
index 0aa3251397..5faca0bf7a 100644
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -166,7 +166,10 @@ XEN_CPUFEATURE(FMA4,  3*32+16) /*A  4 operands MAC 
instructions */
 XEN_CPUFEATURE(NODEID_MSR,3*32+19) /*   NodeId MSR */
 XEN_CPUFEATURE(TBM,   3*32+21) /*A  trailing bit manipulations */
 XEN_CPUFEATURE(TOPOEXT,   3*32+22) /*   topology extensions CPUID leafs */
+XEN_CPUFEATURE(PERFCTREXTCORE, 3*32+23) /*A! Extended core performance 
event-select registers */
+XEN_CPUFEATURE(PERFCTREXTNB,  3*32+24) /*   Extended Northbridge performance 
counters */
 XEN_CPUFEATURE(DBEXT, 3*32+26) /*A  data breakpoint extension */
+XEN_CPUFEATURE(PERFCTREXTL2I, 3*32+28) /*   Extended L2 cache performance 
counters */
 XEN_CPUFEATURE(MONITORX,  3*32+29) /*   MONITOR extension 
(MONITORX/MWAITX) */
 
 /* Intel-defined CPU features, CPUID level 0x000D:1.eax, word 4 */
@@ -238,6 +241,7 @@ XEN_CPUFEATURE(EFRO,  7*32+10) /*   APERF/MPERF 
Read Only interface */
 
 /* AMD-defined CPU features, CPUID level 0x8008.ebx, word 8 */
 XEN_CPUFEATURE(CLZERO,8*32+ 0) /*A  CLZERO instruction */
+XEN_CPUFEATURE(IRPERF,8*32+ 1) /* Instruction Retired Performance 
Counter */
 XEN_CPUFEATURE(RSTR_FP_ERR_PTRS, 8*32+ 2) /*A  (F)X{SAVE,RSTOR} always 
saves/restores FPU Error pointers */
 XEN_CPUFEATURE(WBNOINVD,  8*32+ 9) /*   WBNOINVD instruction */
 XEN_CPUFEATURE(IBPB,  8*32+12) /*A  IBPB support only (no IBRS, used 
by AMD) */
-- 
2.41.0




[RFC PATCH 16/22] x86/PMUv4: support LBR_Frz and CTR_Frz

2023-10-25 Thread Edwin Török
From: Edwin Török 

The behaviour is changed from Legacy to Streamlined for the LBR and
PERFMON freeze bits.
See "17.4.7 Freezing LBR and Performance Counters on PMI".

Instead of clearing the freeze bits through DEBUGCTL they are now
cleared through MSR 0x390 like everything else.

Signed-off-by: Edwin Török 
---
 xen/arch/x86/cpu/vpmu_intel.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/xen/arch/x86/cpu/vpmu_intel.c b/xen/arch/x86/cpu/vpmu_intel.c
index 82cd2656ea..923fe42a0b 100644
--- a/xen/arch/x86/cpu/vpmu_intel.c
+++ b/xen/arch/x86/cpu/vpmu_intel.c
@@ -952,6 +952,13 @@ const struct arch_vpmu_ops *__init core2_vpmu_init(void)
  */
 global_ovf_ctrl_mask &= ~(1ULL << 61);
 
+if ( version >= 4)
+/* On PMU version 4 bits 58 and 59 are defined in
+ * IA32_PERF_GLOBAL_STATUS_RESET (same MSR as IA32_PERF_GLOBAL_STATUS).
+ * Also allow clearing overflow for processor trace, even if we don't 
support it yet.
+ * */
+global_ovf_ctrl_mask &= ~((3ULL << 58) | (1ULL << 55));
+
 regs_sz = (sizeof(struct xen_pmu_intel_ctxt) - regs_off) +
   sizeof(uint64_t) * fixed_pmc_cnt +
   sizeof(struct xen_pmu_cntr_pair) * arch_pmc_cnt;
-- 
2.41.0




[RFC PATCH 14/22] x86/msr: RO MSR_TURBO_RATIO_LIMIT{,1,2}, MSR_TEMPERATURE_TARGET

2023-10-25 Thread Edwin Török
From: Edwin Török 

Expose MSR_TURBO_RATIO_LIMIT{,1,2} and MSR_TEMPERATURE_TARGET to guest as RO.
Although these are not architectural MSRs they are in the same place
currently on all supported CPUs.
They also have the same meaning, except for 06_55H and 06_5C where
they have a different meaning (turbo core count).

It is safe to expose this to the guest by default: they are only
statically defined limits and don't expose runtime measurements.

It has been observed that some drivers BSOD on an unguarded read on
MSR 1ADH (e.g. socwatch).

Also we read as zero the actual temperature, so reporting the temp
target as 0 might lead to 0/0.

Backport: 4.15+

Signed-off-by: Edwin Török 
---
 xen/arch/x86/hvm/vmx/vmx.c | 9 +
 xen/arch/x86/pv/emul-priv-op.c | 4 
 2 files changed, 13 insertions(+)

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index fefd01be40..cd772585fe 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -3353,6 +3353,15 @@ static int cf_check vmx_msr_read_intercept(
 if ( !nvmx_msr_read_intercept(msr, msr_content) )
 goto gp_fault;
 break;
+
+case MSR_TEMPERATURE_TARGET:
+case MSR_TURBO_RATIO_LIMIT...MSR_TURBO_RATIO_LIMIT2:
+if ( !rdmsr_safe(msr, *msr_content) )
+break;
+/* RO for guests, MSR_PLATFORM_INFO bits set accordingly in msr.c to 
indicate lack of write
+ * support. */
+goto gp_fault;
+
 case MSR_IA32_MISC_ENABLE:
 rdmsrl(MSR_IA32_MISC_ENABLE, *msr_content);
 /* Debug Trace Store is not supported. */
diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
index e623e57b55..09bfde1060 100644
--- a/xen/arch/x86/pv/emul-priv-op.c
+++ b/xen/arch/x86/pv/emul-priv-op.c
@@ -960,6 +960,10 @@ static int cf_check read_msr(
 *val = guest_misc_enable(*val);
 return X86EMUL_OKAY;
 
+case MSR_TEMPERATURE_TARGET:
+case MSR_TURBO_RATIO_LIMIT...MSR_TURBO_RATIO_LIMIT2:
+goto normal;
+
 case MSR_P6_PERFCTR(0) ... MSR_P6_PERFCTR_LAST:
 case MSR_P6_EVNTSEL(0) ... MSR_P6_EVNTSEL_LAST:
 case MSR_CORE_PERF_FIXED_CTR0 ... MSR_CORE_PERF_FIXED_CTRn:
-- 
2.41.0




[RFC PATCH 15/22] x86/VPMU: use macros for max supported VPMU version

2023-10-25 Thread Edwin Török
From: Edwin Török 

This ensures consistency between the 2 pieces of code that check for
VPMU version.

No functional change.

Signed-off-by: Edwin Török 
---
 xen/arch/x86/cpu/vpmu_intel.c   | 20 ++--
 xen/arch/x86/include/asm/vpmu.h |  1 +
 2 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/xen/arch/x86/cpu/vpmu_intel.c b/xen/arch/x86/cpu/vpmu_intel.c
index 4c0776cee7..82cd2656ea 100644
--- a/xen/arch/x86/cpu/vpmu_intel.c
+++ b/xen/arch/x86/cpu/vpmu_intel.c
@@ -810,7 +810,7 @@ static int cf_check core2_vpmu_initialise(struct vcpu *v)
 static bool_t ds_warned;
 
 if ( v->domain->arch.cpuid->basic.pmu_version <= 1 ||
- v->domain->arch.cpuid->basic.pmu_version >= 6 )
+ v->domain->arch.cpuid->basic.pmu_version >= 
VPMU_VERSION_MAX_SUPPORTED )
 return -EINVAL;
 
 if ( (arch_pmc_cnt + fixed_pmc_cnt) == 0 )
@@ -890,22 +890,14 @@ const struct arch_vpmu_ops *__init core2_vpmu_init(void)
 if ( current_cpu_data.cpuid_level >= 0xa )
 version = MASK_EXTR(cpuid_eax(0xa), PMU_VERSION_MASK);
 
-switch ( version )
-{
-case 4:
-case 5:
-printk(XENLOG_INFO "VPMU: PMU version %u is not fully supported. "
-   "Emulating version 3\n", version);
-/* FALLTHROUGH */
-
-case 2:
-case 3:
-break;
-
-default:
+if ( version <= 1 ||
+ version > VPMU_VERSION_MAX_SUPPORTED ) {
 printk(XENLOG_WARNING "VPMU: PMU version %u is not supported\n",
version);
 return ERR_PTR(-EINVAL);
+} else if ( version > VPMU_VERSION_MAX ) {
+printk(XENLOG_INFO "VPMU: PMU version %u is not fully supported. "
+   "Emulating version %d\n", version, VPMU_VERSION_MAX);
 }
 
 if ( current_cpu_data.x86 != 6 )
diff --git a/xen/arch/x86/include/asm/vpmu.h b/xen/arch/x86/include/asm/vpmu.h
index 49c3e8c19a..79f7f4a09e 100644
--- a/xen/arch/x86/include/asm/vpmu.h
+++ b/xen/arch/x86/include/asm/vpmu.h
@@ -75,6 +75,7 @@ struct vpmu_struct {
 #define VPMU_CPU_HAS_BTS0x2000 /* Has Branch Trace Store */
 
 #define VPMU_VERSION_MAX0x3
+#define VPMU_VERSION_MAX_SUPPORTED  0x5
 
 static inline void vpmu_set(struct vpmu_struct *vpmu, const u32 mask)
 {
-- 
2.41.0




[RFC PATCH 11/22] x86/PMUv2: freeze support in IA32_DEBUGCTL

2023-10-25 Thread Edwin Török
From: Edwin Török 

This is not yet exposed by HVM policies, but PMU version 2 requires that
if PDCM is supported in CPUID then these 2 bits would work.

Signed-off-by: Edwin Török 
---
 xen/arch/x86/hvm/vmx/vmx.c   | 4 
 xen/arch/x86/include/asm/msr-index.h | 4 +++-
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 1510e980dd..f1f8a9afa2 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -3601,6 +3601,10 @@ static int cf_check vmx_msr_write_intercept(
   IA32_DEBUGCTLMSR_BTS_OFF_USR);
 }
 
+if (cp->basic.pmu_version >= 2 && cpu_has(_cpu_data, 
X86_FEATURE_PDCM)) {
+rsvd &= ~(IA32_DEBUGCTLMSR_FREEZE_LBRS_ON_PMI | 
IA32_DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI);
+}
+
 if ( cp->feat.rtm )
 rsvd &= ~IA32_DEBUGCTLMSR_RTM;
 
diff --git a/xen/arch/x86/include/asm/msr-index.h 
b/xen/arch/x86/include/asm/msr-index.h
index 8a881a8a6f..0dfb5b499f 100644
--- a/xen/arch/x86/include/asm/msr-index.h
+++ b/xen/arch/x86/include/asm/msr-index.h
@@ -305,7 +305,9 @@
 #define IA32_DEBUGCTLMSR_BTINT (1<<8) /* Branch Trace Interrupt */
 #define IA32_DEBUGCTLMSR_BTS_OFF_OS(1<<9)  /* BTS off if CPL 0 */
 #define IA32_DEBUGCTLMSR_BTS_OFF_USR   (1<<10) /* BTS off if CPL > 0 */
-#define IA32_DEBUGCTLMSR_RTM   (1<<15) /* RTM debugging enable */
+#define IA32_DEBUGCTLMSR_FREEZE_LBRS_ON_PMI(1<<11) /* LBR stack frozen on 
PMI */
+#define IA32_DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI (1<<12) /*  Global counter 
control ENABLE bit frozen on PMI */
+#define IA32_DEBUGCTLMSR_RTM   (1<<15) /* RTM debugging enable 
*/
 
 #define MSR_IA32_LASTBRANCHFROMIP  0x01db
 #define MSR_IA32_LASTBRANCHTOIP0x01dc
-- 
2.41.0




[RFC PATCH 02/22] x86/msr: implement MSR_SMI_COUNT for Dom0 on Intel

2023-10-25 Thread Edwin Török
From: Edwin Török 

Dom0 should always be able to read this MSR: it is useful when
investigating performance issues in production.
Although the count is Thread scoped, in practice all cores were observed
to return the same count (perhaps due to implementation details of SMM),
so do not require the cpu to be pinned in order to read it.

This MSR exists on Intel since Nehalem.

Backport: 4.15+

Signed-off-by: Edwin Török 
---
 xen/arch/x86/include/asm/msr-index.h | 3 +++
 xen/arch/x86/msr.c   | 7 +++
 2 files changed, 10 insertions(+)

diff --git a/xen/arch/x86/include/asm/msr-index.h 
b/xen/arch/x86/include/asm/msr-index.h
index 82a81bd0a2..2853a276ca 100644
--- a/xen/arch/x86/include/asm/msr-index.h
+++ b/xen/arch/x86/include/asm/msr-index.h
@@ -641,6 +641,9 @@
 #define MSR_NHL_LBR_SELECT 0x01c8
 #define MSR_NHL_LASTBRANCH_TOS 0x01c9
 
+/* Nehalem and newer other MSRs */
+#define MSR_SMI_COUNT   0x0034
+
 /* Skylake (and newer) last-branch recording */
 #define MSR_SKL_LASTBRANCH_0_FROM_IP   0x0680
 #define MSR_SKL_LASTBRANCH_0_TO_IP 0x06c0
diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
index c33dc78cd8..0bf6d263e7 100644
--- a/xen/arch/x86/msr.c
+++ b/xen/arch/x86/msr.c
@@ -139,6 +139,13 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t 
*val)
 *val = msrs->misc_features_enables.raw;
 break;
 
+case MSR_SMI_COUNT:
+if ( cp->x86_vendor != X86_VENDOR_INTEL )
+goto gp_fault;
+if ( is_hardware_domain(d) && !rdmsr_safe(msr, *val) )
+break;
+return X86EMUL_UNHANDLEABLE;
+
 case MSR_P5_MC_ADDR:
 case MSR_P5_MC_TYPE:
 case MSR_IA32_MCG_CAP ... MSR_IA32_MCG_CTL:  /* 0x179 -> 0x17b */
-- 
2.41.0




[RFC PATCH 19/22] x86/PMUv4: bump max PMU version to 4

2023-10-25 Thread Edwin Török
From: Edwin Török 

Depends on the other x86/PMUv4 patches:
"x86/PMUv4: disable intercept for PERF_GLOBAL_STATUS"
"x86/PMUv4: IA32_PERF_GLOBAL_{STATUS_SET, INUSE} support"
"x86/PMUv4: support LBR_Frz and CTR_Frz"

Signed-off-by: Edwin Török 
---
 xen/arch/x86/include/asm/vpmu.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xen/arch/x86/include/asm/vpmu.h b/xen/arch/x86/include/asm/vpmu.h
index 79f7f4a09e..eaededadb5 100644
--- a/xen/arch/x86/include/asm/vpmu.h
+++ b/xen/arch/x86/include/asm/vpmu.h
@@ -74,7 +74,7 @@ struct vpmu_struct {
 #define VPMU_CPU_HAS_DS 0x1000 /* Has Debug Store */
 #define VPMU_CPU_HAS_BTS0x2000 /* Has Branch Trace Store */
 
-#define VPMU_VERSION_MAX0x3
+#define VPMU_VERSION_MAX0x4
 #define VPMU_VERSION_MAX_SUPPORTED  0x5
 
 static inline void vpmu_set(struct vpmu_struct *vpmu, const u32 mask)
-- 
2.41.0




[RFC PATCH 12/22] x86/msr-index: define more architectural MSRs

2023-10-25 Thread Edwin Török
From: Edwin Török 

Add most architectural MSRs, except those behind CPUID features that are
not yet implemented, such as TME, SGX.

Based on "2.1 Architectural MSRs" of Intel SDM volume 4

Signed-off-by: Edwin Török 
---
 xen/arch/x86/include/asm/msr-index.h | 54 +---
 1 file changed, 50 insertions(+), 4 deletions(-)

diff --git a/xen/arch/x86/include/asm/msr-index.h 
b/xen/arch/x86/include/asm/msr-index.h
index 0dfb5b499f..061b07c7ae 100644
--- a/xen/arch/x86/include/asm/msr-index.h
+++ b/xen/arch/x86/include/asm/msr-index.h
@@ -58,6 +58,14 @@
 #define  PRED_CMD_IBPB  (_AC(1, ULL) <<  0)
 #define  PRED_CMD_SBPB  (_AC(1, ULL) <<  7)
 
+#define MSR_IA32_SMM_MONITOR_CTL0x009b
+#define MSR_IA32_SMBASE 0x009e
+#define MSR_IA32_SMRR_PHYSBASE  0x01f2
+#define MSR_IA32_SMRR_PHYSMASK  0x01f3
+#define MSR_IA32_PLATFORM_DCA_CAP   0x01f8
+#define MSR_IA32_CPU_DCA_CAP0x01f9
+#define MSR_IA32_DCA_0_CAP  0x01fa
+
 #define MSR_PPIN_CTL0x004e
 #define  PPIN_LOCKOUT   (_AC(1, ULL) <<  0)
 #define  PPIN_ENABLE(_AC(1, ULL) <<  1)
@@ -267,13 +275,21 @@
 #define MSR_IA32_MCG_CAP   0x0179
 #define MSR_IA32_MCG_STATUS0x017a
 #define MSR_IA32_MCG_CTL   0x017b
-#define MSR_IA32_MCG_EXT_CTL   0x04d0
+#define MSR_IA32_MCG_EXT_CTL   0x04d0
 
 #define MSR_IA32_PEBS_ENABLE   0x03f1
 #define MSR_IA32_DS_AREA   0x0600
 #define MSR_IA32_PERF_CAPABILITIES 0x0345
 /* Lower 6 bits define the format of the address in the LBR stack */
-#define MSR_IA32_PERF_CAP_LBR_FORMAT   0x3f
+#define MSR_IA32_PERF_CAP_LBR_FORMAT   0x3f
+#define MSR_IA32_PERF_CAP_PEBS_TRAP(_AC(1,ULL) << 6)
+#define MSR_IA32_PERF_CAP_PEBS_SAVE_ARCH_REGS  (_AC(1,ULL) << 7)
+#define MSR_IA32_PERF_CAP_PEBS_RECORD_FORMAT   0xf00
+#define MSR_IA32_PERF_CAP_FREEZE_WHILE_SMM (_AC(1,ULL) << 12)
+#define MSR_IA32_PERF_CAP_FULLWIDTH_PMC(_AC(1,ULL) << 13)
+#define MSR_IA32_PERF_CAP_PEBS_BASELINE(_AC(1,ULL) << 14)
+#define MSR_IA32_PERF_CAP_PERF_METRICS (_AC(1,ULL) << 15)
+#define MSR_IA32_PERF_CAP_PEBS_TO_PT   (_AC(1,ULL) << 16)
 
 #define MSR_IA32_BNDCFGS   0x0d90
 #define IA32_BNDCFGS_ENABLE0x0001
@@ -307,6 +323,8 @@
 #define IA32_DEBUGCTLMSR_BTS_OFF_USR   (1<<10) /* BTS off if CPL > 0 */
 #define IA32_DEBUGCTLMSR_FREEZE_LBRS_ON_PMI(1<<11) /* LBR stack frozen on 
PMI */
 #define IA32_DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI (1<<12) /*  Global counter 
control ENABLE bit frozen on PMI */
+#define IA32_DEBUGCTLMSR_ENABLE_UNCORE_PMI (1<<13) /* Enable uncore PMI */
+#define IA32_DEBUGCTLMSR_FREEZE_WHILE_SMM  (1<<14) /* Freeze perfmon/trace 
while in SMM */
 #define IA32_DEBUGCTLMSR_RTM   (1<<15) /* RTM debugging enable 
*/
 
 #define MSR_IA32_LASTBRANCHFROMIP  0x01db
@@ -469,6 +487,7 @@
 #define MSR_VIA_RNG0x110b
 
 /* Intel defined MSRs. */
+#define MSR_IA32_MONITOR_FILTER_SIZE   0x0006
 #define MSR_IA32_TSC   0x0010
 #define MSR_IA32_PLATFORM_ID   0x0017
 #define MSR_IA32_EBL_CR_POWERON0x002a
@@ -491,6 +510,7 @@
 #define MSR_IA32_PERF_STATUS   0x0198
 #define MSR_IA32_PERF_CTL  0x0199
 
+#define MSR_IA32_UMWAIT_CONTROL0x00e1
 #define MSR_IA32_MPERF 0x00e7
 #define MSR_IA32_APERF 0x00e8
 
@@ -498,6 +518,7 @@
 #define MSR_IA32_THERM_INTERRUPT   0x019b
 #define MSR_IA32_THERM_STATUS  0x019c
 #define MSR_IA32_MISC_ENABLE   0x01a0
+#define MSR_IA32_MISC_ENABLE_FAST_STRINGS (1<<0)
 #define MSR_IA32_MISC_ENABLE_PERF_AVAIL   (1<<7)
 #define MSR_IA32_MISC_ENABLE_BTS_UNAVAIL  (1<<11)
 #define MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL (1<<12)
@@ -508,15 +529,38 @@
 #define MSR_IA32_MISC_ENABLE_TURBO_DISENGAGE (_AC(1, ULL) << 38)
 
 #define MSR_IA32_TSC_DEADLINE  0x06E0
+
+#define MSR_IA32_PM_ENABLE 0x0770
+#define MSR_IA32_HWP_CAPABILITIES  0x0771
+#define MSR_IA32_HWP_REQUEST_PKG   0x0772
+#define MSR_IA32_HWP_INTERRUPT 0x0773
+#define MSR_IA32_HWP_REQUEST   0x0774
+#define MSR_IA32_PECI_HWP_REQUEST_INFO 0x0775
+#define MSR_IA32_HWP_STATUS0x0777
+
+#define MSR_IA32_PKG_HDC_CTL   0x0db0
+#define MSR_IA32_PM_CTL1   0x0db1
+#define MSR_IA32_THREAD_STALL  0x0db2
+#define MSR_IA32_HW_FEEDBACK_PTR   0x17d0
+#define MSR_IA32_HW_FEEDBACK_CONFIG0

[RFC PATCH 05/22] x86/PMUv1: report correct information in 0xa CPUID

2023-10-25 Thread Edwin Török
From: Edwin Török 

The 0xa CPUID leaf has to report supported number of:
- fixed performance counters
- general purpose performance counters
- architectural predefined events

And the PMU version (which was already limited to 3).

Type punning is used, which should be safe due to -fno-strict-aliasing.

This limits the number of arch events supported when vpmu=arch on Icelake.

Backport: 4.0+

Signed-off-by: Edwin Török 
---
 xen/arch/x86/cpu/vpmu_intel.c   |  2 +-
 xen/arch/x86/cpuid.c| 39 ++---
 xen/arch/x86/include/asm/vpmu.h |  4 
 3 files changed, 41 insertions(+), 4 deletions(-)

diff --git a/xen/arch/x86/cpu/vpmu_intel.c b/xen/arch/x86/cpu/vpmu_intel.c
index fa5b40c65c..9602728f1b 100644
--- a/xen/arch/x86/cpu/vpmu_intel.c
+++ b/xen/arch/x86/cpu/vpmu_intel.c
@@ -66,7 +66,7 @@ static bool_t __read_mostly full_width_write;
 #define ARCH_CNTR_PIN_CONTROL (1ULL << 19)
 
 /* Number of general-purpose and fixed performance counters */
-static unsigned int __read_mostly arch_pmc_cnt, fixed_pmc_cnt;
+unsigned int __read_mostly arch_pmc_cnt, fixed_pmc_cnt;
 
 /* Masks used for testing whether and MSR is valid */
 #define ARCH_CTRL_MASK  (~((1ull << 32) - 1) | (1ull << 21) | 
ARCH_CNTR_PIN_CONTROL)
diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
index 455a09b2dd..dfbcd1b3a4 100644
--- a/xen/arch/x86/cpuid.c
+++ b/xen/arch/x86/cpuid.c
@@ -304,9 +304,42 @@ void guest_cpuid(const struct vcpu *v, uint32_t leaf,
 *res = EMPTY_LEAF;
 else
 {
-/* Report at most v3 since that's all we currently emulate. */
-if ( (res->a & 0xff) > 3 )
-res->a = (res->a & ~0xff) | 3;
+union {
+uint32_t eax;
+struct {
+uint8_t version;
+uint8_t general_nr;
+uint8_t general_width;
+uint8_t arch_nr;
+};
+} u;
+u.eax = res->a;
+
+/* Report at most VPMU_VERSION_MAX since that's all we currently 
emulate. */
+if ( u.version >  VPMU_VERSION_MAX ) {
+gdprintk(XENLOG_WARNING, "Limiting PMU version to %d (actual 
%d)", VPMU_VERSION_MAX, u.version);
+u.version = VPMU_VERSION_MAX;
+}
+
+if ( u.general_nr > arch_pmc_cnt ) {
+gdprintk(XENLOG_WARNING, "Limiting general purpose PMU count 
to %d (actual %d)", arch_pmc_cnt, u.general_nr);
+u.general_nr = arch_pmc_cnt;
+}
+
+if ( vpmu_features & (XENPMU_FEATURE_IPC_ONLY |
+  XENPMU_FEATURE_ARCH_ONLY) ) {
+unsigned limit = ( vpmu_features & XENPMU_FEATURE_ARCH_ONLY ) 
? 7 : 3;
+if (limit < u.arch_nr) {
+gdprintk(XENLOG_WARNING, "Limiting architectural PMU 
events to %d (actual %d)", limit, u.arch_nr);
+u.arch_nr = limit;
+}
+}
+
+res->a = u.eax;
+
+/* We only implement 3 fixed function counters */
+if ( (res->d & 0x1f) > fixed_pmc_cnt )
+res->d = (res->d & ~0x1f) | fixed_pmc_cnt;
 }
 break;
 
diff --git a/xen/arch/x86/include/asm/vpmu.h b/xen/arch/x86/include/asm/vpmu.h
index b165acc6c2..1ef6089ccb 100644
--- a/xen/arch/x86/include/asm/vpmu.h
+++ b/xen/arch/x86/include/asm/vpmu.h
@@ -74,6 +74,8 @@ struct vpmu_struct {
 #define VPMU_CPU_HAS_DS 0x1000 /* Has Debug Store */
 #define VPMU_CPU_HAS_BTS0x2000 /* Has Branch Trace Store */
 
+#define VPMU_VERSION_MAX0x3
+
 static inline void vpmu_set(struct vpmu_struct *vpmu, const u32 mask)
 {
 vpmu->flags |= mask;
@@ -118,6 +120,8 @@ static inline int vpmu_do_rdmsr(unsigned int msr, uint64_t 
*msr_content)
 
 extern unsigned int vpmu_mode;
 extern unsigned int vpmu_features;
+extern unsigned int arch_pmc_cnt;
+extern unsigned int fixed_pmc_cnt;
 
 /* Context switch */
 static inline void vpmu_switch_from(struct vcpu *prev)
-- 
2.41.0




[RFC PATCH 01/22] x86/msr: MSR_PLATFORM_INFO shouldn't claim that turbo is programmable

2023-10-25 Thread Edwin Török
From: Edwin Török 

Xen forbids writes to the various turbo control MSRs, however MSR_PLATFORM_INFO 
claims that these MSRs are writable.
Override MSR_PLATFORM_INFO bits to indicate lack of support.

See Intel SDM Volume 4, 2.17.6 "MSRs Introduced in the Intel Xeon Scaslable 
Processor Family",
which describes that MSR_PLATFORM_INFO.[28] = 1 implies that 
MSR_TURBO_RATIO_LIMIT is R/W,
and similarly bit 29 for TDP control, and bit 30 for MSR_TEMPERATURE_TARGET.

These bits were not all present on earlier processors, however where missing 
the bits were reserved,
and when present they are always present in the same bits.

(Curiously bit 31 that Xen uses is not documented anywhere in this manual but a 
separate one).

Backport: 4.0+

Signed-off-by: Edwin Török 
---
 xen/arch/x86/cpu-policy.c| 8 
 xen/include/xen/lib/x86/cpu-policy.h | 5 -
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
index 81e574390f..64c8857a61 100644
--- a/xen/arch/x86/cpu-policy.c
+++ b/xen/arch/x86/cpu-policy.c
@@ -407,6 +407,14 @@ static void __init calculate_host_policy(void)
 /* 0x00ce  MSR_INTEL_PLATFORM_INFO */
 /* probe_cpuid_faulting() sanity checks presence of MISC_FEATURES_ENABLES 
*/
 p->platform_info.cpuid_faulting = cpu_has_cpuid_faulting;
+
+/* Xen denies write access to turbo control MSRs, however natively the CPU 
may support them
+   and advertise those MSRs as writable by having bits 28 to 30 set to 1 
in MSR_PLATFORM_INFO.
+   Set these bits to 0 to avoid confusing guests on the availability of 
turbo controls.
+*/
+p->platform_info.programmable_ratio_turbo = 0;
+p->platform_info.programmable_tdp_turbo = 0;
+p->platform_info.programmable_tj_offset = 0;
 }
 
 static void __init guest_common_max_feature_adjustments(uint32_t *fs)
diff --git a/xen/include/xen/lib/x86/cpu-policy.h 
b/xen/include/xen/lib/x86/cpu-policy.h
index bab3eecda6..70479689f2 100644
--- a/xen/include/xen/lib/x86/cpu-policy.h
+++ b/xen/include/xen/lib/x86/cpu-policy.h
@@ -339,7 +339,10 @@ struct cpu_policy
 union {
 uint32_t raw;
 struct {
-uint32_t :31;
+uint32_t :28;
+bool programmable_ratio_turbo:1;
+bool programmable_tdp_turbo:1;
+bool programmable_tj_offset:1;
 bool cpuid_faulting:1;
 };
 } platform_info;
-- 
2.41.0




[RFC PATCH 07/22] x86/PMUv1: allow topdown slots arch perf event

2023-10-25 Thread Edwin Török
From: Edwin Török 

This is part of 'Architectural Performance Monitoring Version 1'
and implemented on Icelake.

Backport: 4.13+

Signed-off-by: Edwin Török 
---
 xen/arch/x86/cpu/vpmu_intel.c | 1 +
 xen/arch/x86/cpuid.c  | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/cpu/vpmu_intel.c b/xen/arch/x86/cpu/vpmu_intel.c
index ec9ab01fde..44a1ed5b10 100644
--- a/xen/arch/x86/cpu/vpmu_intel.c
+++ b/xen/arch/x86/cpu/vpmu_intel.c
@@ -645,6 +645,7 @@ static int cf_check core2_vpmu_do_wrmsr(unsigned int msr, 
uint64_t msr_content)
 case 0x412e:   /* Last Level Cache Misses */
 case 0x00c4:   /* Branch Instructions Retired */
 case 0x00c5:   /* All Branch Mispredict Retired */
+case 0x01a4:   /* Topdown Slots */
 blocked = 0;
 break;
}
diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
index dfbcd1b3a4..51ee89afc4 100644
--- a/xen/arch/x86/cpuid.c
+++ b/xen/arch/x86/cpuid.c
@@ -328,7 +328,7 @@ void guest_cpuid(const struct vcpu *v, uint32_t leaf,
 
 if ( vpmu_features & (XENPMU_FEATURE_IPC_ONLY |
   XENPMU_FEATURE_ARCH_ONLY) ) {
-unsigned limit = ( vpmu_features & XENPMU_FEATURE_ARCH_ONLY ) 
? 7 : 3;
+unsigned limit = ( vpmu_features & XENPMU_FEATURE_ARCH_ONLY ) 
? 8 : 3;
 if (limit < u.arch_nr) {
 gdprintk(XENLOG_WARNING, "Limiting architectural PMU 
events to %d (actual %d)", limit, u.arch_nr);
 u.arch_nr = limit;
-- 
2.41.0




[RFC PATCH 09/22] x86/PMUv1: consistently use 8 perf counters in Dom0

2023-10-25 Thread Edwin Török
From: Edwin Török 

The code is currently inconsistent: supports 4 on read and 8 on write.
Sandy Bridge+ supports 8 of these, and the MSR range is architecturally
reserved, so always support 8.

Make it a macro to ensure we use the same value everywhere.

Although DomUs are now restricted to only 4 PMCs, we may still want to
use all 8 in Dom0 when available, and since the default rdwmsr
restrictions Dom0 would be prevented to read these MSRs as well.

Depends on:
"x86/PMUv1: limit arch PMCs to 4 for non-Dom0"

Backport: 4.15+

Signed-off-by: Edwin Török 
---
 xen/arch/x86/hvm/vmx/vmx.c   | 8 
 xen/arch/x86/include/asm/msr-index.h | 3 +++
 xen/arch/x86/pv/emul-priv-op.c   | 8 
 3 files changed, 11 insertions(+), 8 deletions(-)

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index f6e5123f66..7d51addf7a 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -3360,8 +3360,8 @@ static int cf_check vmx_msr_read_intercept(
MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL;
 /* Perhaps vpmu will change some bits. */
 /* FALLTHROUGH */
-case MSR_P6_PERFCTR(0)...MSR_P6_PERFCTR(7):
-case MSR_P6_EVNTSEL(0)...MSR_P6_EVNTSEL(3):
+case MSR_P6_PERFCTR(0)...MSR_P6_PERFCTR_LAST:
+case MSR_P6_EVNTSEL(0)...MSR_P6_EVNTSEL_LAST:
 case MSR_CORE_PERF_FIXED_CTR0...MSR_CORE_PERF_FIXED_CTR2:
 case MSR_CORE_PERF_FIXED_CTR_CTRL...MSR_CORE_PERF_GLOBAL_OVF_CTRL:
 case MSR_IA32_PEBS_ENABLE:
@@ -3678,8 +3678,8 @@ static int cf_check vmx_msr_write_intercept(
 goto gp_fault;
 break;
 
-case MSR_P6_PERFCTR(0)...MSR_P6_PERFCTR(7):
-case MSR_P6_EVNTSEL(0)...MSR_P6_EVNTSEL(7):
+case MSR_P6_PERFCTR(0)...MSR_P6_PERFCTR_LAST:
+case MSR_P6_EVNTSEL(0)...MSR_P6_EVNTSEL_LAST:
 case MSR_CORE_PERF_FIXED_CTR0...MSR_CORE_PERF_FIXED_CTR2:
 case MSR_CORE_PERF_FIXED_CTR_CTRL...MSR_CORE_PERF_GLOBAL_OVF_CTRL:
 case MSR_IA32_PEBS_ENABLE:
diff --git a/xen/arch/x86/include/asm/msr-index.h 
b/xen/arch/x86/include/asm/msr-index.h
index 8601f8f426..011a926e0e 100644
--- a/xen/arch/x86/include/asm/msr-index.h
+++ b/xen/arch/x86/include/asm/msr-index.h
@@ -521,8 +521,11 @@
 #define MSR_IA32_PSR_MBA_MASK(n)   (0x0d50 + (n))
 
 /* Intel Model 6 */
+#define MSR_P6_PERFCTR_MAX 8
 #define MSR_P6_PERFCTR(n)  (0x00c1 + (n))
 #define MSR_P6_EVNTSEL(n)  (0x0186 + (n))
+#define MSR_P6_PERFCTR_LASTMSR_P6_PERFCTR(MSR_P6_PERFCTR_MAX-1)
+#define MSR_P6_EVNTSEL_LASTMSR_P6_EVNTSEL(MSR_P6_PERFCTR_MAX-1)
 
 /* P4/Xeon+ specific */
 #define MSR_IA32_MCG_EAX   0x0180
diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
index 978ae679a2..301a70f5ea 100644
--- a/xen/arch/x86/pv/emul-priv-op.c
+++ b/xen/arch/x86/pv/emul-priv-op.c
@@ -965,8 +965,8 @@ static int cf_check read_msr(
 *val = 0;
 return X86EMUL_OKAY;
 
-case MSR_P6_PERFCTR(0) ... MSR_P6_PERFCTR(7):
-case MSR_P6_EVNTSEL(0) ... MSR_P6_EVNTSEL(3):
+case MSR_P6_PERFCTR(0) ... MSR_P6_PERFCTR_LAST:
+case MSR_P6_EVNTSEL(0) ... MSR_P6_EVNTSEL_LAST:
 case MSR_CORE_PERF_FIXED_CTR0 ... MSR_CORE_PERF_FIXED_CTR2:
 case MSR_CORE_PERF_FIXED_CTR_CTRL ... MSR_CORE_PERF_GLOBAL_OVF_CTRL:
 if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
@@ -1145,8 +1145,8 @@ static int cf_check write_msr(
 return X86EMUL_OKAY;
 break;
 
-case MSR_P6_PERFCTR(0) ... MSR_P6_PERFCTR(7):
-case MSR_P6_EVNTSEL(0) ... MSR_P6_EVNTSEL(3):
+case MSR_P6_PERFCTR(0) ... MSR_P6_PERFCTR_LAST:
+case MSR_P6_EVNTSEL(0) ... MSR_P6_EVNTSEL_LAST:
 case MSR_CORE_PERF_FIXED_CTR0 ... MSR_CORE_PERF_FIXED_CTR2:
 case MSR_CORE_PERF_FIXED_CTR_CTRL ... MSR_CORE_PERF_GLOBAL_OVF_CTRL:
 if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
-- 
2.41.0




[RFC PATCH 13/22] x86/vpmu: expose PDCM and IA32_PERF_CAPABILITIES when vpmu is enabled

2023-10-25 Thread Edwin Török
From: Edwin Török 

Marked as exposed by default, but then hidden if vpmu is not available.
TODO: the interaction between vpmu and policy might need some changes.

Only expose LBR and the full-width MSR capabilities, and not PEBS.

Backport: 4.15+

Signed-off-by: Edwin Török 
---
 xen/arch/x86/cpu-policy.c   | 10 --
 xen/arch/x86/hvm/vmx/vmx.c  |  2 +-
 xen/arch/x86/msr.c  |  8 
 xen/arch/x86/pv/emul-priv-op.c  |  5 -
 xen/include/public/arch-x86/cpufeatureset.h |  2 +-
 5 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c
index 64c8857a61..e38b648f7d 100644
--- a/xen/arch/x86/cpu-policy.c
+++ b/xen/arch/x86/cpu-policy.c
@@ -388,8 +388,10 @@ static void __init calculate_host_policy(void)
 recalculate_misc(p);
 
 /* When vPMU is disabled, drop it from the host policy. */
-if ( vpmu_mode == XENPMU_MODE_OFF )
+if ( vpmu_mode == XENPMU_MODE_OFF ) {
 p->basic.raw[0xa] = EMPTY_LEAF;
+p->basic.pdcm = 0;
+}
 
 if ( p->extd.svm )
 {
@@ -899,8 +901,12 @@ void recalculate_cpuid_policy(struct domain *d)
 }
 
 if ( vpmu_mode == XENPMU_MODE_OFF ||
- ((vpmu_mode & XENPMU_MODE_ALL) && !is_hardware_domain(d)) )
+ ((vpmu_mode & XENPMU_MODE_ALL) && !is_hardware_domain(d)) ) {
 p->basic.raw[0xa] = EMPTY_LEAF;
+p->basic.pdcm = 0;
+}
+if ( vpmu_features & (XENPMU_FEATURE_IPC_ONLY | XENPMU_FEATURE_ARCH_ONLY) )
+p->basic.pdcm = 0;
 
 if ( !p->extd.svm )
 p->extd.raw[0xa] = EMPTY_LEAF;
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index f1f8a9afa2..fefd01be40 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -3602,7 +3602,7 @@ static int cf_check vmx_msr_write_intercept(
 }
 
 if (cp->basic.pmu_version >= 2 && cpu_has(_cpu_data, 
X86_FEATURE_PDCM)) {
-rsvd &= ~(IA32_DEBUGCTLMSR_FREEZE_LBRS_ON_PMI | 
IA32_DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI);
+rsvd &= ~(IA32_DEBUGCTLMSR_FREEZE_LBRS_ON_PMI | 
IA32_DEBUGCTLMSR_FREEZE_PERFMON_ON_PMI | IA32_DEBUGCTLMSR_FREEZE_WHILE_SMM);
 }
 
 if ( cp->feat.rtm )
diff --git a/xen/arch/x86/msr.c b/xen/arch/x86/msr.c
index 0bf6d263e7..483b5e4f70 100644
--- a/xen/arch/x86/msr.c
+++ b/xen/arch/x86/msr.c
@@ -186,6 +186,14 @@ int guest_rdmsr(struct vcpu *v, uint32_t msr, uint64_t 
*val)
 goto gp_fault;
 goto get_reg;
 
+case MSR_IA32_PERF_CAPABILITIES:
+if ( cp->x86_vendor != X86_VENDOR_INTEL )
+goto gp_fault;
+if ( !cp->basic.pdcm || rdmsr_safe(msr, *val) )
+goto gp_fault;
+*val &= (MSR_IA32_PERF_CAP_LBR_FORMAT | 
MSR_IA32_PERF_CAP_FREEZE_WHILE_SMM | MSR_IA32_PERF_CAP_FULLWIDTH_PMC);
+break;
+
 case MSR_X2APIC_FIRST ... MSR_X2APIC_LAST:
 if ( !is_hvm_domain(d) || v != curr )
 goto gp_fault;
diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
index a8472fc779..e623e57b55 100644
--- a/xen/arch/x86/pv/emul-priv-op.c
+++ b/xen/arch/x86/pv/emul-priv-op.c
@@ -960,11 +960,6 @@ static int cf_check read_msr(
 *val = guest_misc_enable(*val);
 return X86EMUL_OKAY;
 
-case MSR_IA32_PERF_CAPABILITIES:
-/* No extra capabilities are supported. */
-*val = 0;
-return X86EMUL_OKAY;
-
 case MSR_P6_PERFCTR(0) ... MSR_P6_PERFCTR_LAST:
 case MSR_P6_EVNTSEL(0) ... MSR_P6_EVNTSEL_LAST:
 case MSR_CORE_PERF_FIXED_CTR0 ... MSR_CORE_PERF_FIXED_CTRn:
diff --git a/xen/include/public/arch-x86/cpufeatureset.h 
b/xen/include/public/arch-x86/cpufeatureset.h
index 6b6ce2745c..0aa3251397 100644
--- a/xen/include/public/arch-x86/cpufeatureset.h
+++ b/xen/include/public/arch-x86/cpufeatureset.h
@@ -118,7 +118,7 @@ XEN_CPUFEATURE(SSSE3, 1*32+ 9) /*A  Supplemental 
Streaming SIMD Extensio
 XEN_CPUFEATURE(FMA,   1*32+12) /*A  Fused Multiply Add */
 XEN_CPUFEATURE(CX16,  1*32+13) /*A  CMPXCHG16B */
 XEN_CPUFEATURE(XTPR,  1*32+14) /*   Send Task Priority Messages */
-XEN_CPUFEATURE(PDCM,  1*32+15) /*   Perf/Debug Capability MSR */
+XEN_CPUFEATURE(PDCM,  1*32+15) /*A  Perf/Debug Capability MSR */
 XEN_CPUFEATURE(PCID,  1*32+17) /*H  Process Context ID */
 XEN_CPUFEATURE(DCA,   1*32+18) /*   Direct Cache Access */
 XEN_CPUFEATURE(SSE4_1,1*32+19) /*A  Streaming SIMD Extensions 4.1 */
-- 
2.41.0




[RFC PATCH 17/22] x86/PMUv4: IA32_PERF_GLOBAL_{STATUS_SET, INUSE} support

2023-10-25 Thread Edwin Török
From: Edwin Török 

Expose thse MSRs to the guest when PMU version is >= 4.

Signed-off-by: Edwin Török 
---
 xen/arch/x86/cpu/vpmu_intel.c  | 20 +++-
 xen/arch/x86/hvm/vmx/vmx.c |  5 +
 xen/arch/x86/pv/emul-priv-op.c |  5 +
 3 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/xen/arch/x86/cpu/vpmu_intel.c b/xen/arch/x86/cpu/vpmu_intel.c
index 923fe42a0b..5e660af395 100644
--- a/xen/arch/x86/cpu/vpmu_intel.c
+++ b/xen/arch/x86/cpu/vpmu_intel.c
@@ -171,6 +171,8 @@ static int is_core2_vpmu_msr(u32 msr_index, int *type, int 
*index)
 case MSR_CORE_PERF_GLOBAL_CTRL:
 case MSR_CORE_PERF_GLOBAL_STATUS:
 case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+case MSR_CORE_PERF_GLOBAL_STATUS_SET:
+case MSR_CORE_PERF_GLOBAL_INUSE:
 *type = MSR_TYPE_GLOBAL;
 return 1;
 
@@ -545,10 +547,21 @@ static int cf_check core2_vpmu_do_wrmsr(unsigned int msr, 
uint64_t msr_content)
 core2_vpmu_cxt->global_status &= ~msr_content;
 wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, msr_content);
 return 0;
+case MSR_CORE_PERF_GLOBAL_STATUS_SET:
+if ( (v->domain->arch.cpuid->basic.pmu_version < 4) ||
+ (msr_content & global_ovf_ctrl_mask) )
+return -EINVAL;
+core2_vpmu_cxt->global_status |= msr_content;
+wrmsrl(MSR_CORE_PERF_GLOBAL_STATUS_SET, msr_content);
+return 0;
 case MSR_CORE_PERF_GLOBAL_STATUS:
 gdprintk(XENLOG_INFO, "Can not write readonly MSR: "
  "MSR_PERF_GLOBAL_STATUS(0x38E)!\n");
 return -EINVAL;
+case MSR_CORE_PERF_GLOBAL_INUSE:
+gdprintk(XENLOG_INFO, "Can not write readonly MSR: "
+ "MSR_PERF_GLOBAL_INUSE(0x392)!\n");
+return -EINVAL;
 case MSR_IA32_PEBS_ENABLE:
 if ( vpmu_features & (XENPMU_FEATURE_IPC_ONLY |
   XENPMU_FEATURE_ARCH_ONLY) )
@@ -688,7 +701,8 @@ static int cf_check core2_vpmu_do_rdmsr(unsigned int msr, 
uint64_t *msr_content)
 core2_vpmu_cxt = vpmu->context;
 switch ( msr )
 {
-case MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+case MSR_CORE_PERF_GLOBAL_OVF_CTRL: /* FALLTHROUGH */
+case MSR_CORE_PERF_GLOBAL_STATUS_SET:
 *msr_content = 0;
 break;
 case MSR_CORE_PERF_GLOBAL_STATUS:
@@ -700,6 +714,10 @@ static int cf_check core2_vpmu_do_rdmsr(unsigned int msr, 
uint64_t *msr_content)
 else
 rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, *msr_content);
 break;
+case MSR_CORE_PERF_GLOBAL_INUSE:
+if ( v->domain->arch.cpuid->basic.pmu_version < 4 )
+return -EINVAL;
+/* FALLTHROUGH */
 default:
 rdmsrl(msr, *msr_content);
 }
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index cd772585fe..af70ed8f30 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -3375,6 +3375,8 @@ static int cf_check vmx_msr_read_intercept(
 case MSR_CORE_PERF_FIXED_CTR_CTRL...MSR_CORE_PERF_GLOBAL_OVF_CTRL:
 case MSR_IA32_PEBS_ENABLE:
 case MSR_IA32_DS_AREA:
+case MSR_CORE_PERF_GLOBAL_STATUS_SET:
+case MSR_CORE_PERF_GLOBAL_INUSE:
 if ( vpmu_do_rdmsr(msr, msr_content) )
 goto gp_fault;
 break;
@@ -3698,6 +3700,9 @@ static int cf_check vmx_msr_write_intercept(
 case MSR_IA32_PEBS_ENABLE:
 case MSR_IA32_DS_AREA:
  if ( vpmu_do_wrmsr(msr, msr_content) )
+case MSR_CORE_PERF_GLOBAL_STATUS_SET:
+case MSR_CORE_PERF_GLOBAL_INUSE:
+ if ( vpmu_do_wrmsr(msr, msr_content) )
 goto gp_fault;
 break;
 
diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
index 09bfde1060..105485bb1e 100644
--- a/xen/arch/x86/pv/emul-priv-op.c
+++ b/xen/arch/x86/pv/emul-priv-op.c
@@ -968,6 +968,9 @@ static int cf_check read_msr(
 case MSR_P6_EVNTSEL(0) ... MSR_P6_EVNTSEL_LAST:
 case MSR_CORE_PERF_FIXED_CTR0 ... MSR_CORE_PERF_FIXED_CTRn:
 case MSR_CORE_PERF_FIXED_CTR_CTRL ... MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+case MSR_IA32_PEBS_ENABLE:
+case MSR_CORE_PERF_GLOBAL_STATUS_SET:
+case MSR_CORE_PERF_GLOBAL_INUSE:
 if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
 {
 vpmu_msr = true;
@@ -1148,6 +1151,8 @@ static int cf_check write_msr(
 case MSR_P6_EVNTSEL(0) ... MSR_P6_EVNTSEL_LAST:
 case MSR_CORE_PERF_FIXED_CTR0 ... MSR_CORE_PERF_FIXED_CTRn:
 case MSR_CORE_PERF_FIXED_CTR_CTRL ... MSR_CORE_PERF_GLOBAL_OVF_CTRL:
+case MSR_CORE_PERF_GLOBAL_STATUS_SET:
+case MSR_CORE_PERF_GLOBAL_INUSE:
 if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
 {
 vpmu_msr = true;
-- 
2.41.0




[RFC PATCH 10/22] x86/PMUv2: limit number of fixed perf counters to 3

2023-10-25 Thread Edwin Török
From: Edwin Török 

There are only 3 architectural fixed function counters defined,
however Icelake introduces a 4th.
So we'll need to report the number of fixed counter implemented in CPUID
correctly for Icelake, define a macro to ensure we are consistent about
which counter is last.

Note: simply adding MSR_CORE_PERF_FIXED_CTR3 is not enough, Icelake also
defines MSR_PERF_METRICS and there are some ordering constraints on
restoring the MSR, and atomicity constraints on IA32_PERF_GLOBAL_CTRL,
so this is not implemented yet.

Backport: 4.13+

Signed-off-by: Edwin Török 
---
 xen/arch/x86/cpu/vpmu_intel.c| 6 ++
 xen/arch/x86/hvm/vmx/vmx.c   | 4 ++--
 xen/arch/x86/include/asm/msr-index.h | 4 ++--
 xen/arch/x86/pv/emul-priv-op.c   | 4 ++--
 4 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/xen/arch/x86/cpu/vpmu_intel.c b/xen/arch/x86/cpu/vpmu_intel.c
index ef8d69a0d6..4c0776cee7 100644
--- a/xen/arch/x86/cpu/vpmu_intel.c
+++ b/xen/arch/x86/cpu/vpmu_intel.c
@@ -926,6 +926,12 @@ const struct arch_vpmu_ops *__init core2_vpmu_init(void)
 }
 
 fixed_pmc_cnt = core2_get_fixed_pmc_count();
+#define PERF_FIXED_CTR_MAX (MSR_CORE_PERF_FIXED_CTRn - 
MSR_CORE_PERF_FIXED_CTR0 + 1)
+if ( fixed_pmc_cnt > PERF_FIXED_CTR_MAX )
+{
+printk(XENLOG_INFO "VPMU: limiting fixed perf counters to %d\n", 
PERF_FIXED_CTR_MAX);
+fixed_pmc_cnt = PERF_FIXED_CTR_MAX;
+}
 
 if ( cpu_has_pdcm )
 {
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 7d51addf7a..1510e980dd 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -3362,7 +3362,7 @@ static int cf_check vmx_msr_read_intercept(
 /* FALLTHROUGH */
 case MSR_P6_PERFCTR(0)...MSR_P6_PERFCTR_LAST:
 case MSR_P6_EVNTSEL(0)...MSR_P6_EVNTSEL_LAST:
-case MSR_CORE_PERF_FIXED_CTR0...MSR_CORE_PERF_FIXED_CTR2:
+case MSR_CORE_PERF_FIXED_CTR0...MSR_CORE_PERF_FIXED_CTRn:
 case MSR_CORE_PERF_FIXED_CTR_CTRL...MSR_CORE_PERF_GLOBAL_OVF_CTRL:
 case MSR_IA32_PEBS_ENABLE:
 case MSR_IA32_DS_AREA:
@@ -3680,7 +3680,7 @@ static int cf_check vmx_msr_write_intercept(
 
 case MSR_P6_PERFCTR(0)...MSR_P6_PERFCTR_LAST:
 case MSR_P6_EVNTSEL(0)...MSR_P6_EVNTSEL_LAST:
-case MSR_CORE_PERF_FIXED_CTR0...MSR_CORE_PERF_FIXED_CTR2:
+case MSR_CORE_PERF_FIXED_CTR0...MSR_CORE_PERF_FIXED_CTRn:
 case MSR_CORE_PERF_FIXED_CTR_CTRL...MSR_CORE_PERF_GLOBAL_OVF_CTRL:
 case MSR_IA32_PEBS_ENABLE:
 case MSR_IA32_DS_AREA:
diff --git a/xen/arch/x86/include/asm/msr-index.h 
b/xen/arch/x86/include/asm/msr-index.h
index 011a926e0e..8a881a8a6f 100644
--- a/xen/arch/x86/include/asm/msr-index.h
+++ b/xen/arch/x86/include/asm/msr-index.h
@@ -674,8 +674,8 @@
 
 /* Intel Core-based CPU performance counters */
 #define MSR_CORE_PERF_FIXED_CTR0   0x0309
-#define MSR_CORE_PERF_FIXED_CTR1   0x030a
-#define MSR_CORE_PERF_FIXED_CTR2   0x030b
+#define MSR_CORE_PERF_FIXED_CTRn   0x030b
+
 #define MSR_CORE_PERF_FIXED_CTR_CTRL   0x038d
 #define MSR_CORE_PERF_GLOBAL_STATUS0x038e
 #define MSR_CORE_PERF_GLOBAL_CTRL  0x038f
diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
index 301a70f5ea..a8472fc779 100644
--- a/xen/arch/x86/pv/emul-priv-op.c
+++ b/xen/arch/x86/pv/emul-priv-op.c
@@ -967,7 +967,7 @@ static int cf_check read_msr(
 
 case MSR_P6_PERFCTR(0) ... MSR_P6_PERFCTR_LAST:
 case MSR_P6_EVNTSEL(0) ... MSR_P6_EVNTSEL_LAST:
-case MSR_CORE_PERF_FIXED_CTR0 ... MSR_CORE_PERF_FIXED_CTR2:
+case MSR_CORE_PERF_FIXED_CTR0 ... MSR_CORE_PERF_FIXED_CTRn:
 case MSR_CORE_PERF_FIXED_CTR_CTRL ... MSR_CORE_PERF_GLOBAL_OVF_CTRL:
 if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
 {
@@ -1147,7 +1147,7 @@ static int cf_check write_msr(
 
 case MSR_P6_PERFCTR(0) ... MSR_P6_PERFCTR_LAST:
 case MSR_P6_EVNTSEL(0) ... MSR_P6_EVNTSEL_LAST:
-case MSR_CORE_PERF_FIXED_CTR0 ... MSR_CORE_PERF_FIXED_CTR2:
+case MSR_CORE_PERF_FIXED_CTR0 ... MSR_CORE_PERF_FIXED_CTRn:
 case MSR_CORE_PERF_FIXED_CTR_CTRL ... MSR_CORE_PERF_GLOBAL_OVF_CTRL:
 if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
 {
-- 
2.41.0




[RFC PATCH 06/22] x86/PMUv1: limit arch PMCs to 4 for non-Dom0

2023-10-25 Thread Edwin Török
From: Edwin Török 

Only PERFEVTSEL{0-3} are architectural MSRs and Thread scoped.

PERFEVTSEL{4-7} are Core scoped, and we cannot allow using them if more
than 1 guest can attempt to modify them: if they program them with
different events (quite likely when multiplexing) then one of the VMs
would sample the wrong PMCs.

For now only allow this when Dom0 is the only one using the PMU, i.e. in
vpmu mode `all`.

We could also allow this when sched_gran >= SCHED_GRAN_core, but we
don't have access to the cpupool here.

There is some indication that this was causing bugs, e.g.
`pcm` mentions about a bug with perf counters beyond 3 on AWS:
https://github.com/opcm/pcm/commit/02f3b00f304401c723131372e09b71798df613ff

Backport: 4.0+

Signed-off-by: Edwin Török 
---
 xen/arch/x86/cpu/vpmu_intel.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/xen/arch/x86/cpu/vpmu_intel.c b/xen/arch/x86/cpu/vpmu_intel.c
index 9602728f1b..ec9ab01fde 100644
--- a/xen/arch/x86/cpu/vpmu_intel.c
+++ b/xen/arch/x86/cpu/vpmu_intel.c
@@ -926,6 +926,16 @@ const struct arch_vpmu_ops *__init core2_vpmu_init(void)
 }
 
 arch_pmc_cnt = core2_get_arch_pmc_count();
+if ( arch_pmc_cnt > 4 &&
+ vpmu_mode != XENPMU_MODE_ALL )
+{
+/* Architectural PMCs 0-3 are Thread scoped, but 4+ are Core scoped.
+ * We can only allow using them if we know that we have at most one 
guest using a PMU
+ * on all siblings threads on a core. */
+printk(XENLOG_INFO "VPMU: limiting architectural PMCs to 4\n");
+arch_pmc_cnt = 4;
+}
+
 fixed_pmc_cnt = core2_get_fixed_pmc_count();
 
 if ( cpu_has_pdcm )
-- 
2.41.0




[RFC PATCH 08/22] x86/PMUv1: define macro for max number of events

2023-10-25 Thread Edwin Török
From: Edwin Török 

This is needed so we can expose the maximum supported in CPUID,
without cpuid.c and vpmu_intel.c going out of sync.

The macros defined here take a parameter that controls how the enum
values are used: either to generate case statements or to count how many
elements we have.

They are a variation on https://en.wikipedia.org/wiki/X_Macro

No functional change.

Could be backported to 4.13.

Signed-off-by: Edwin Török 
---
 xen/arch/x86/cpu/vpmu_intel.c   | 16 ++--
 xen/arch/x86/cpuid.c|  2 +-
 xen/arch/x86/include/asm/vpmu.h | 27 +++
 3 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/xen/arch/x86/cpu/vpmu_intel.c b/xen/arch/x86/cpu/vpmu_intel.c
index 44a1ed5b10..ef8d69a0d6 100644
--- a/xen/arch/x86/cpu/vpmu_intel.c
+++ b/xen/arch/x86/cpu/vpmu_intel.c
@@ -622,15 +622,7 @@ static int cf_check core2_vpmu_do_wrmsr(unsigned int msr, 
uint64_t msr_content)
 blocked = 1;
 switch ( umaskevent )
 {
-/*
- * See the Pre-Defined Architectural Performance Events table
- * from the Intel 64 and IA-32 Architectures Software
- * Developer's Manual, Volume 3B, System Programming Guide,
- * Part 2.
- */
-case 0x003c:   /* UnHalted Core Cycles */
-case 0x013c:   /* UnHalted Reference Cycles */
-case 0x00c0:   /* Instructions Retired */
+VPMU_IPC_EVENTS(DEFCASE)
 blocked = 0;
 break;
 }
@@ -641,11 +633,7 @@ static int cf_check core2_vpmu_do_wrmsr(unsigned int msr, 
uint64_t msr_content)
 /* Additional counters beyond IPC only; blocked already set. */
 switch ( umaskevent )
 {
-case 0x4f2e:   /* Last Level Cache References */
-case 0x412e:   /* Last Level Cache Misses */
-case 0x00c4:   /* Branch Instructions Retired */
-case 0x00c5:   /* All Branch Mispredict Retired */
-case 0x01a4:   /* Topdown Slots */
+VPMU_ARCH_EVENTS(DEFCASE)
 blocked = 0;
 break;
}
diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c
index 51ee89afc4..12e768ae87 100644
--- a/xen/arch/x86/cpuid.c
+++ b/xen/arch/x86/cpuid.c
@@ -328,7 +328,7 @@ void guest_cpuid(const struct vcpu *v, uint32_t leaf,
 
 if ( vpmu_features & (XENPMU_FEATURE_IPC_ONLY |
   XENPMU_FEATURE_ARCH_ONLY) ) {
-unsigned limit = ( vpmu_features & XENPMU_FEATURE_ARCH_ONLY ) 
? 8 : 3;
+unsigned limit = VPMU_IPC_EVENTS_MAX + ( vpmu_features & 
XENPMU_FEATURE_ARCH_ONLY ) ? VPMU_ARCH_EVENTS_MAX : 0;
 if (limit < u.arch_nr) {
 gdprintk(XENLOG_WARNING, "Limiting architectural PMU 
events to %d (actual %d)", limit, u.arch_nr);
 u.arch_nr = limit;
diff --git a/xen/arch/x86/include/asm/vpmu.h b/xen/arch/x86/include/asm/vpmu.h
index 1ef6089ccb..49c3e8c19a 100644
--- a/xen/arch/x86/include/asm/vpmu.h
+++ b/xen/arch/x86/include/asm/vpmu.h
@@ -146,5 +146,32 @@ static inline int vpmu_allocate_context(struct vcpu *v)
 }
 #endif
 
+/*
+ * See "20.2.1.2 Pre-Defined Architectural Performance Events"
+ * from the Intel 64 and IA-32 Architectures Software
+ * Developer's Manual, Volume 3B, System Programming Guide,
+ * Part 2.
+ */
+#define VPMU_IPC_EVENTS(DEF) \
+DEF(0x003c)/* UnHalted Core Cycles */\
+DEF(0x00c0)/* Instructions Retired */\
+DEF(0x013c)/* UnHalted Reference Cycles */\
+
+
+#define VPMU_ARCH_EVENTS(DEF) \
+VPMU_IPC_EVENTS(DEF)\
+DEF(0x4f2e)/* Last Level Cache References */\
+DEF(0x412e)/* Last Level Cache Misses */\
+DEF(0x00c4)/* Branch Instructions Retired */\
+DEF(0x00c5)/* All Branch Mispredict Retired */\
+DEF(0x01a4)/* Topdown Slots */\
+
+#define DEFCASE(x) case (x):
+#define DEFSUM(x) +1
+#define DEFCOUNT(X) (0+X(DEFSUM))
+
+#define VPMU_IPC_EVENTS_MAX DEFCOUNT(VPMU_IPC_EVENTS)
+#define VPMU_ARCH_EVENTS_MAX DEFCOUNT(VPMU_ARCH_EVENTS)
+
 #endif /* __ASM_X86_HVM_VPMU_H_*/
 
-- 
2.41.0




[RFC PATCH 04/22] x86/msr-index: add references to vendor manuals

2023-10-25 Thread Edwin Török
From: Edwin Török 

To more easily lookup the semantics of these MSRs add references to
vendor manuals.

Signed-off-by: Edwin Török 
---
 xen/arch/x86/include/asm/msr-index.h | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/xen/arch/x86/include/asm/msr-index.h 
b/xen/arch/x86/include/asm/msr-index.h
index 2853a276ca..8601f8f426 100644
--- a/xen/arch/x86/include/asm/msr-index.h
+++ b/xen/arch/x86/include/asm/msr-index.h
@@ -13,6 +13,16 @@
  * Blocks of related constants should be sorted by MSR index.  The constant
  * names should be as concise as possible, and the bit names may have an
  * abbreviated name.  Exceptions will be considered on a case-by-case basis.
+ *
+ * References:
+ * - 
https://software.intel.com/content/www/us/en/develop/articles/intel-sdm.html
+ *Intel(R) 64 and IA-32 architectures SDM volume 4: Model-specific 
registers
+ *Chapter 2, "Model-Specific Registers (MSRs)"
+ *
+ * - https://developer.amd.com/resources/developer-guides-manuals/
+ *AMD64 Architecture Programmer’s Manual Volume 2: System Programming
+ *Appendix A "MSR Cross-Reference"
+ *
  */
 
 #define MSR_P5_MC_ADDR  0
-- 
2.41.0




[RFC PATCH 00/22] vPMU bugfixes and support for PMUv5

2023-10-25 Thread Edwin Török
These are a set of patches to improve performance monitoring in Xen.
It starts by fixing MSR_PLATFORM_INFO and making MSR_SMI_COUNT available.
Also allows a pinned Dom0 to read any MSR, there is no reason why this 
shouldn't be allowed, and it prevents having to recompile Xen in order to 
investigate a problem that is difficult to reproduce (e.g. one such MSR is the 
SMI counter).

Then it fixes bugs in each architectural PMU version starting from version 1, 
and introduces support
for new features up to version 5. This was done by reading the Intel SDM for 
each version and checking whether Xen implements the needed features correctly.

AMD is next with fixes for performance counters that were readable but not 
exposed in CPUID, and new support for another performance counter.

There is more to be done here (APERF/MPERF, IBS, temperature monitoring, and I 
haven't even began approaching uncore counters), but those are difficult to do 
correctly and I've split them out of this series.

The patches were written in 2021, then git segfaulted when rebasing them and 
they got lost in a branch until now.
I've rebased and compile-tested them on latest Xen master, but I haven't done 
any further testing on them yet, hence the RFC prefix to mark these as 
experimental (e.g. I'm not entirely sure whether some changes got lost in the 
cpuid.c to cpu-policy.c rebase or not).

I'm sending this out as an RFC so it is not lost again, and I'll resend a 
proper version once the code has been tested more.
The backport tags attempt to indicate which version of Xen is the oldest one 
that has a bug fixed by the patch, should a backport be desired.
E.g. Xen with vpmu on would claim it supports PMUv3, but doesn't actually 
implement it correctly, so those patches+AMD CPUID fix could be backported.

Edwin Török (22):
  x86/msr: MSR_PLATFORM_INFO shouldn't claim that turbo is programmable
  x86/msr: implement MSR_SMI_COUNT for Dom0 on Intel
  x86/msr: always allow a pinned Dom0 to read any unknown MSR
  x86/msr-index: add references to vendor manuals
  x86/PMUv1: report correct information in 0xa CPUID
  x86/PMUv1: limit arch PMCs to 4 for non-Dom0
  x86/PMUv1: allow topdown slots arch perf event
  x86/PMUv1: define macro for max number of events
  x86/PMUv1: consistently use 8 perf counters in Dom0
  x86/PMUv2: limit number of fixed perf counters to 3
  x86/PMUv2: freeze support in IA32_DEBUGCTL
  x86/msr-index: define more architectural MSRs
  x86/vpmu: expose PDCM and IA32_PERF_CAPABILITIES when vpmu is enabled
  x86/msr: RO MSR_TURBO_RATIO_LIMIT{,1,2}, MSR_TEMPERATURE_TARGET
  x86/VPMU: use macros for max supported VPMU version
  x86/PMUv4: support LBR_Frz and CTR_Frz
  x86/PMUv4: IA32_PERF_GLOBAL_{STATUS_SET, INUSE} support
  x86/PMUv4: disable intercept for PERF_GLOBAL_STATUS
  x86/PMUv4: bump max PMU version to 4
  x86/PMUv5: limit available fixed PMCs and enable support
  x86/AMD: fix CPUID for PerfCtr{4,5}
  x86/AMD: add IRPerf support

 xen/arch/x86/cpu-policy.c   | 32 +--
 xen/arch/x86/cpu/vpmu_intel.c   | 99 ++---
 xen/arch/x86/cpuid.c| 40 -
 xen/arch/x86/hvm/svm/svm.c  |  4 +
 xen/arch/x86/hvm/vmx/vmx.c  | 33 +--
 xen/arch/x86/include/asm/msr-index.h| 79 ++--
 xen/arch/x86/include/asm/vpmu.h | 32 +++
 xen/arch/x86/msr.c  | 22 +
 xen/arch/x86/pv/emul-priv-op.c  | 28 +++---
 xen/include/public/arch-x86/cpufeatureset.h |  6 +-
 xen/include/xen/lib/x86/cpu-policy.h|  5 +-
 11 files changed, 317 insertions(+), 63 deletions(-)

-- 
2.41.0




[RFC PATCH 03/22] x86/msr: always allow a pinned Dom0 to read any unknown MSR

2023-10-25 Thread Edwin Török
From: Edwin Török 

This can be useful if you realize you have to inspect the value of an
MSR in production, without having to change into a new Xen first that
handles the MSR.

E.g. SMI count didn't use to be explicitly allowed in the past
(it now is, see a previous commit), but there could be other MSRs that
are useful when tracking down issues.

Backport: 4.15+

Signed-off-by: Edwin Török 
---
 xen/arch/x86/hvm/svm/svm.c | 3 +++
 xen/arch/x86/hvm/vmx/vmx.c | 3 +++
 xen/arch/x86/pv/emul-priv-op.c | 3 +++
 3 files changed, 9 insertions(+)

diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
index 24c417ca71..45f8e1ffd1 100644
--- a/xen/arch/x86/hvm/svm/svm.c
+++ b/xen/arch/x86/hvm/svm/svm.c
@@ -1933,6 +1933,9 @@ static int cf_check svm_msr_read_intercept(
 break;
 
 default:
+if ( is_hwdom_pinned_vcpu(v) && !rdmsr_safe(msr, *msr_content) )
+break;
+
 if ( d->arch.msr_relaxed && !rdmsr_safe(msr, tmp) )
 {
 *msr_content = 0;
diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 1edc7f1e91..f6e5123f66 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -3377,6 +3377,9 @@ static int cf_check vmx_msr_read_intercept(
 if ( vmx_read_guest_msr(curr, msr, msr_content) == 0 )
 break;
 
+if ( is_hwdom_pinned_vcpu(curr) && !rdmsr_safe(msr, *msr_content) )
+return X86EMUL_OKAY;
+
 if ( is_last_branch_msr(msr) )
 {
 *msr_content = 0;
diff --git a/xen/arch/x86/pv/emul-priv-op.c b/xen/arch/x86/pv/emul-priv-op.c
index 0d9f84f458..978ae679a2 100644
--- a/xen/arch/x86/pv/emul-priv-op.c
+++ b/xen/arch/x86/pv/emul-priv-op.c
@@ -985,6 +985,9 @@ static int cf_check read_msr(
 }
 /* fall through */
 default:
+if ( is_hwdom_pinned_vcpu(curr) && !rdmsr_safe(reg, *val) )
+return X86EMUL_OKAY;
+
 if ( currd->arch.msr_relaxed && !rdmsr_safe(reg, tmp) )
 {
 *val = 0;
-- 
2.41.0




[PATCH] tools/ocaml/Makefile.rules: use correct C flags when compiling OCaml C stubs

2023-10-25 Thread Edwin Török
From: Edwin Török 

The code currently uses GCC to compile OCaml C stubs directly,
and although in most cases this works, it is not entirely correct.

This will fail if the OCaml runtime has been recompiled to use and link with 
ASAN for example
(or other situations where a flag needs to be used consistently in everything 
that is linked into the same binary).

Use the OCaml compiler instead, which knows how to invoke the correct C 
compiler with the correct flags,
and append the Xen specific CFLAGS to that instead.

Drop the explicit -fPIC and -I$(ocamlc -where): these will now be provided by 
the compiler as needed.

Use -verbose so we see the actuall full C compiler command line invocation done 
by the OCaml compiler.

Signed-off-by: Edwin Török 
---
 tools/ocaml/Makefile.rules | 2 +-
 tools/ocaml/common.make| 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/tools/ocaml/Makefile.rules b/tools/ocaml/Makefile.rules
index 0d3c6ac839..74856e2282 100644
--- a/tools/ocaml/Makefile.rules
+++ b/tools/ocaml/Makefile.rules
@@ -37,7 +37,7 @@ ALL_OCAML_OBJS ?= $(OBJS)
$(call quiet-command, $(OCAMLYACC) -q $<,MLYACC,$@)
 
 %.o: %.c
-   $(call quiet-command, $(CC) $(CFLAGS) -c -o $@ $<,CC,$@)
+   $(call quiet-command, $(OCAMLOPT) -verbose $(addprefix -ccopt 
,$(CFLAGS)) -c -o $@ $<,CC,$@)
 
 META: META.in
sed 's/@VERSION@/$(VERSION)/g' < $< $o
diff --git a/tools/ocaml/common.make b/tools/ocaml/common.make
index 0c8a597d5b..629e4b3e66 100644
--- a/tools/ocaml/common.make
+++ b/tools/ocaml/common.make
@@ -9,8 +9,6 @@ OCAMLLEX ?= ocamllex
 OCAMLYACC ?= ocamlyacc
 OCAMLFIND ?= ocamlfind
 
-CFLAGS += -fPIC -I$(shell ocamlc -where)
-
 OCAMLOPTFLAG_G := $(shell $(OCAMLOPT) -h 2>&1 | sed -n 's/^  *\(-g\) .*/\1/p')
 OCAMLOPTFLAGS = $(OCAMLOPTFLAG_G) -ccopt "$(LDFLAGS)" -dtypes $(OCAMLINCLUDE) 
-cc $(CC) -w F -warn-error F
 OCAMLCFLAGS += -g $(OCAMLINCLUDE) -w F -warn-error F
-- 
2.41.0




[PATCH] xenstored: do not redirect stderr to /dev/null

2023-10-25 Thread Edwin Török
From: Edwin Török 

By default stderr gets redirected to /dev/null because oxenstored daemonizes 
itself.
This must be a left-over from pre-systemd days.

In ee7815f49f ("tools/oxenstored: Set uncaught exception handler") a workaround 
was added to log exceptions
directly to syslog to cope with standard error being lost.

However it is better to not lose standard error (what if the connection to 
syslog itself fails, how'd we log that?),
and use the '--no-fork' flag to do that.
This flag is supported by both C and O versions of xenstored.

Both versions also call sd_notify so there is no need for forking.

Leave the default daemonize as is so that xenstored keeps working on non-Linux 
systems as before.

Signed-off-by: Edwin Török 
---
 tools/hotplug/Linux/init.d/sysconfig.xencommons.in | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/hotplug/Linux/init.d/sysconfig.xencommons.in 
b/tools/hotplug/Linux/init.d/sysconfig.xencommons.in
index 433e4849af..09a1230cee 100644
--- a/tools/hotplug/Linux/init.d/sysconfig.xencommons.in
+++ b/tools/hotplug/Linux/init.d/sysconfig.xencommons.in
@@ -52,7 +52,7 @@
 # like "--trace-file @XEN_LOG_DIR@/xenstored-trace.log"
 # See "@sbindir@/xenstored --help" for possible options.
 # Only evaluated if XENSTORETYPE is "daemon".
-XENSTORED_ARGS=
+XENSTORED_ARGS=--no-fork
 
 ## Type: string
 ## Default: Not defined, tracing off
-- 
2.41.0




[PATCH] xenctrl_stubs.c: fix NULL dereference

2023-07-13 Thread Edwin Török
From: Edwin Török 

`Tag_cons` is `0` and is meant to be used as the tag argument for 
`caml_alloc`/`caml_alloc_small`
when constructing a non-empty list.
The empty list is `Val_emptylist` instead (which is really just `Val_int(0)`).

Assigning `0` to a list value like this is equivalent to assigning the naked 
pointer `NULL` to the field.
Naked pointers are not valid in OCaml 5, however even in OCaml <5.x any attempt 
to iterate on the list will lead to a segfault.

The list currently only has an opaque type, so no code would have reason to 
iterate on it currently,
but we shouldn't construct invalid OCaml values that might lead to a crash when 
exploring the type.

`Val_emptylist` is available since OCaml 3.01 as a constant.

Fixes: e5ac68a011 ("x86/hvm: Revert per-domain APIC acceleration support")

Signed-off-by: Edwin Török 
---
 tools/ocaml/libs/xc/xenctrl_stubs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/ocaml/libs/xc/xenctrl_stubs.c 
b/tools/ocaml/libs/xc/xenctrl_stubs.c
index e4d9070f2d..3703f48c74 100644
--- a/tools/ocaml/libs/xc/xenctrl_stubs.c
+++ b/tools/ocaml/libs/xc/xenctrl_stubs.c
@@ -832,7 +832,7 @@ CAMLprim value physinfo_arch_caps(const xc_physinfo_t *info)
 
tag = 1; /* tag x86 */
 
-   arch_obj = Tag_cons;
+   arch_obj = Val_emptylist;
 
 #endif
 
-- 
2.41.0




[PATCH v1 0/2] fix memory leaks reported by GCC -fanalyzer

2023-02-24 Thread Edwin Török
From: Edwin Török 

Using GCC 12.2.1 with -fanalyzer it has shown some memory leaks:

This is how I enabled -fanalyzer (adding it to CFLAGS for toplevel
configure didn't seem to work):

```
CFLAGS += $(call cc-option,$(CC),-fanalyzer)
```

Note that there are more errors shown than fixed here, but they seem to
be false positives (which is why this flag cannot, yet, be enabled by
default).

Edwin Török (2):
  xc_core_arch_map_p2m_tree_rw: fix memory leak
  backup_ptes: fix leak on realloc failure

 tools/libs/guest/xg_core_x86.c | 2 ++
 tools/libs/guest/xg_offline_page.c | 7 +--
 2 files changed, 7 insertions(+), 2 deletions(-)

-- 
2.39.1




[PATCH v1 1/2] xc_core_arch_map_p2m_tree_rw: fix memory leak

2023-02-24 Thread Edwin Török
From: Edwin Török 

Prior to bd7a29c3d0 'out' would've always been executed and memory
freed, but that commit changed it such that it returns early and leaks.

Found using gcc 12.2.1 `-fanalyzer`:
```
xg_core_x86.c: In function ‘xc_core_arch_map_p2m_tree_rw’:
xg_core_x86.c:300:5: error: leak of ‘p2m_frame_list_list’ [CWE-401] 
[-Werror=analyzer-malloc-leak]
  300 | return p2m_frame_list;
  | ^~
  ‘xc_core_arch_map_p2m_writable’: events 1-2
|
|  378 | xc_core_arch_map_p2m_writable(xc_interface *xch, struct 
domain_info_context *dinfo, xc_dominfo_t *info,
|  | ^
|  | |
|  | (1) entry to ‘xc_core_arch_map_p2m_writable’
|..
|  381 | return xc_core_arch_map_p2m_rw(xch, dinfo, info, live_shinfo, 
live_p2m, 1);
|  |
~~~
|  ||
|  |(2) calling ‘xc_core_arch_map_p2m_rw’ from 
‘xc_core_arch_map_p2m_writable’
|
+--> ‘xc_core_arch_map_p2m_rw’: events 3-10
   |
   |  319 | xc_core_arch_map_p2m_rw(xc_interface *xch, struct 
domain_info_context *dinfo, xc_dominfo_t *info,
   |  | ^~~
   |  | |
   |  | (3) entry to ‘xc_core_arch_map_p2m_rw’
   |..
   |  328 | if ( xc_domain_nr_gpfns(xch, info->domid, 
>p2m_size) < 0 )
   |  |~
   |  ||
   |  |(4) following ‘false’ branch...
   |..
   |  334 | if ( dinfo->p2m_size < info->nr_pages  )
   |  | ~~ ~
   |  | |  |
   |  | |  (6) following ‘false’ branch...
   |  | (5) ...to here
   |..
   |  340 | p2m_cr3 = GET_FIELD(live_shinfo, arch.p2m_cr3, 
dinfo->guest_width);
   |  | ~~~
   |  | |
   |  | (7) ...to here
   |  341 |
   |  342 | p2m_frame_list = p2m_cr3 ? 
xc_core_arch_map_p2m_list_rw(xch, dinfo, dom, live_shinfo, p2m_cr3)
   |  |  
~
   |  343 |  : 
xc_core_arch_map_p2m_tree_rw(xch, dinfo, dom, live_shinfo);
   |  |  

   |  |  | |
   |  |  | (9) ...to here
   |  |  | (10) calling 
‘xc_core_arch_map_p2m_tree_rw’ from ‘xc_core_arch_map_p2m_rw’
   |  |  (8) following ‘false’ branch...
   |
   +--> ‘xc_core_arch_map_p2m_tree_rw’: events 11-24
  |
  |  228 | xc_core_arch_map_p2m_tree_rw(xc_interface *xch, 
struct domain_info_context *dinfo,
  |  | ^~~~
  |  | |
  |  | (11) entry to ‘xc_core_arch_map_p2m_tree_rw’
  |..
  |  245 | if ( !live_p2m_frame_list_list )
  |  |~
  |  ||
  |  |(12) following ‘false’ branch (when 
‘live_p2m_frame_list_list’ is non-NULL)...
  |..
  |  252 | if ( !(p2m_frame_list_list = malloc(PAGE_SIZE)) )
  |  | ~~ ~ ~
  |  | |  | |
  |  | |  | (14) allocated here
  |  | |  (15) assuming ‘p2m_frame_list_list’ is 
non-NULL
  |  | |  (16) following ‘false’ branch (when 
‘p2m_frame_list_list’ is non-NULL)...
  |  | (13) ...to here
  |..
  |  257 | memcpy(p2m_frame_list_list, 
live_p2m_frame_list_list, PAGE_SIZE);
  |  | ~~
  |  | |
  |  | (17) ...to here
  |..
  |  266 | else if ( dinfo->guest_width < sizeof(unsigned 
long) )
  |  | ~
  |  | |
  |  | (18) following ‘false’ branch...
  |..
  |  270 | live_p2m_frame_list =
  |  | ~~~
  |  | |
  |  | (19) ...to here
  |..
  |  275 | if ( !live_p2m_frame_list )
  |  |~
  |  ||
  |  |(20) following ‘false’ branch (w

[PATCH v1 2/2] backup_ptes: fix leak on realloc failure

2023-02-24 Thread Edwin Török
From: Edwin Török 

>From `man 2 realloc`:
`If realloc() fails, the original block is left untouched; it is not freed or 
moved.`

Found using GCC -fanalyzer:
```
|  184 | backup->entries = realloc(backup->entries,
|  | ~~
|  | |   | |
|  | |   | (91) when ‘realloc’ fails
|  | |   (92) ‘old_ptes.entries’ leaks here; was 
allocated at (44)
|  | (90) ...to here
```

Signed-off-by: Edwin Török 
---
 tools/libs/guest/xg_offline_page.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tools/libs/guest/xg_offline_page.c 
b/tools/libs/guest/xg_offline_page.c
index c594fdba41..a8bcea768b 100644
--- a/tools/libs/guest/xg_offline_page.c
+++ b/tools/libs/guest/xg_offline_page.c
@@ -181,10 +181,13 @@ static int backup_ptes(xen_pfn_t table_mfn, int offset,
 
 if (backup->max == backup->cur)
 {
-backup->entries = realloc(backup->entries,
+void* orig = backup->entries;
+backup->entries = realloc(orig,
 backup->max * 2 * sizeof(struct pte_backup_entry));
-if (backup->entries == NULL)
+if (backup->entries == NULL) {
+free(orig);
 return -1;
+}
 else
 backup->max *= 2;
 }
-- 
2.39.1




[PATCH v4 02/11] tools/ocaml/xenstored/Makefile: use ocamldep -sort for linking order

2022-12-16 Thread Edwin Török
A better solution is being worked on for master,
but for now use ocamldep -sort instead of a manually established link
order.
The manually established link order will be wrong when (security)
patches introduce new dependencies between files that would require
changing the link order.

If dune was used as a build system this wouldn't be a problem, but we
can't use Dune yet due to OSSTest, which is stuck on Debian oldstable.

No functional change.

Signed-off-by: Edwin Török 
Acked-by: Christian Lindig 
---
Reason for inclusion in 4.17:

Avoids having to put this patch as a prerequisite into a security update.
Earlier versions of XSA-326 needed this, and although latest version
didn't we don't know whether it might be needed again in the future or not.

Changes since v2:
- new in v3 (was previously emailed to security team though)

Changes since v3:
- add Acked-by line
---
 tools/ocaml/xenstored/Makefile | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/ocaml/xenstored/Makefile b/tools/ocaml/xenstored/Makefile
index 6f7333926e..e8aaecf2e6 100644
--- a/tools/ocaml/xenstored/Makefile
+++ b/tools/ocaml/xenstored/Makefile
@@ -72,7 +72,11 @@ XENSTOREDLIBS = \
 PROGRAMS = oxenstored
 
 oxenstored_LIBS = $(XENSTOREDLIBS)
-oxenstored_OBJS = $(OBJS)
+# use ocamldep to figure out link order, otherwise the Makefile would have
+# to be continously adjusted for security patches that introduce new
+# dependencies between files
+oxenstored_MLSORTED = $(shell $(OCAMLDEP) -sort $(OBJS:=.ml))
+oxenstored_OBJS = $(oxenstored_MLSORTED:.ml=)
 
 OCAML_PROGRAM = oxenstored
 
-- 
2.34.1




[PATCH v4 07/11] fixup! tools/ocaml/xenctrl: OCaml 5 support, fix use-after-free

2022-12-16 Thread Edwin Török
Fix unused value warning.

Fixes: 8b3c06a3e5 ("tools/ocaml/xenctrl: OCaml 5 support, fix use-after-free")
Signed-off-by: Edwin Török 
Cc: Andrew Cooper 

---
Changes:
* new patch
---
 tools/ocaml/libs/xc/xenctrl.ml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/ocaml/libs/xc/xenctrl.ml b/tools/ocaml/libs/xc/xenctrl.ml
index b02be07429..a59dee0938 100644
--- a/tools/ocaml/libs/xc/xenctrl.ml
+++ b/tools/ocaml/libs/xc/xenctrl.ml
@@ -182,7 +182,7 @@ let get_handle () = !handle
 
 let close_handle () =
   match !handle with
-  | Some h -> handle := None
+  | Some _ -> handle := None
   | None -> ()
 
 let with_intf f =
-- 
2.34.1




[PATCH v4 11/11] tools/ocaml/libs/xb: drop Xs_ring.write

2022-12-16 Thread Edwin Török
Unused, only Xs_ring.write_substring is used.
Also the bytes/string conversion here is backwards: the C stub implements the
bytes version and then we use a Bytes.unsafe_of_string to convert a string into
bytes.

However the operation here really is read-only: we read from the string and
write it to the ring, so the C stub should implement the read-only string
version, and if needed we could use Bytes.unsafe_to_string to be able to send
'bytes'. However that is not necessary as the 'bytes' version is dropped above.

Signed-off-by: Edwin Török 
---
Changes:
* new patch
---
 tools/ocaml/libs/xb/xs_ring.ml  | 5 +
 tools/ocaml/libs/xb/xs_ring_stubs.c | 2 +-
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/tools/ocaml/libs/xb/xs_ring.ml b/tools/ocaml/libs/xb/xs_ring.ml
index 2a27aa56c2..87c6b243e4 100644
--- a/tools/ocaml/libs/xb/xs_ring.ml
+++ b/tools/ocaml/libs/xb/xs_ring.ml
@@ -25,14 +25,11 @@ module Server_features = Set.Make(struct
   end)
 
 external read: Xenmmap.mmap_interface -> bytes -> int -> int = 
"ml_interface_read"
-external write: Xenmmap.mmap_interface -> bytes -> int -> int = 
"ml_interface_write"
+external write_substring: Xenmmap.mmap_interface -> string -> int -> int = 
"ml_interface_write"
 
 external _internal_set_server_features: Xenmmap.mmap_interface -> int -> unit 
= "ml_interface_set_server_features" [@@noalloc]
 external _internal_get_server_features: Xenmmap.mmap_interface -> int = 
"ml_interface_get_server_features" [@@noalloc]
 
-let write_substring mmap buff len =
-  write mmap (Bytes.unsafe_of_string buff) len
-
 let get_server_features mmap =
   (* NB only one feature currently defined above *)
   let x = _internal_get_server_features mmap in
diff --git a/tools/ocaml/libs/xb/xs_ring_stubs.c 
b/tools/ocaml/libs/xb/xs_ring_stubs.c
index 28c79ee139..dca6059b0d 100644
--- a/tools/ocaml/libs/xb/xs_ring_stubs.c
+++ b/tools/ocaml/libs/xb/xs_ring_stubs.c
@@ -119,7 +119,7 @@ CAMLprim value ml_interface_write(value ml_interface,
 CAMLlocal1(ml_result);
 
 struct mmap_interface *interface = GET_C_STRUCT(ml_interface);
-const unsigned char *buffer = Bytes_val(ml_buffer);
+const char *buffer = String_val(ml_buffer);
 int len = Int_val(ml_len);
 int result;
 
-- 
2.34.1




[PATCH v4 10/11] tools/ocaml/xenstored: validate config file before live update

2022-12-16 Thread Edwin Török
The configuration file can contain typos or various errors that could prevent
live update from succeeding (e.g. a flag only valid on a different version).
Unknown entries in the config file would be ignored on startup normally,
add a strict --config-test that live-update can use to check that the config 
file
is valid *for the new binary*.

For compatibility with running old code during live update recognize
--live --help as an equivalent to --config-test.

Signed-off-by: Edwin Török 
---
Changes since v2:
* repost of lost patch from 2021: 
https://patchwork.kernel.org/project/xen-devel/patch/a53934dfa8ef984bffa858cc573cc7a6445bbdc0.1620755942.git.edvin.to...@citrix.com/
---
 tools/ocaml/xenstored/parse_arg.ml | 26 ++
 tools/ocaml/xenstored/xenstored.ml | 11 +--
 2 files changed, 35 insertions(+), 2 deletions(-)

diff --git a/tools/ocaml/xenstored/parse_arg.ml 
b/tools/ocaml/xenstored/parse_arg.ml
index 1a85b14ef5..b159b91f00 100644
--- a/tools/ocaml/xenstored/parse_arg.ml
+++ b/tools/ocaml/xenstored/parse_arg.ml
@@ -26,8 +26,14 @@ type config =
 restart: bool;
 live_reload: bool;
 disable_socket: bool;
+config_test: bool;
   }
 
+let get_config_filename config_file =
+  match config_file with
+  | Some name -> name
+  | None  -> Define.default_config_dir ^ "/oxenstored.conf"
+
 let do_argv =
   let pidfile = ref "" and tracefile = ref "" (* old xenstored compatibility *)
   and domain_init = ref true
@@ -38,6 +44,8 @@ let do_argv =
   and restart = ref false
   and live_reload = ref false
   and disable_socket = ref false
+  and config_test = ref false
+  and help = ref false
   in
 
   let speclist =
@@ -55,10 +63,27 @@ let do_argv =
   ("-T", Arg.Set_string tracefile, ""); (* for compatibility *)
   ("--restart", Arg.Set restart, "Read database on starting");
   ("--live", Arg.Set live_reload, "Read live dump on startup");
+  ("--config-test", Arg.Set config_test, "Test validity of config file");
   ("--disable-socket", Arg.Unit (fun () -> disable_socket := true), 
"Disable socket");
+  ("--help", Arg.Set help, "Display this list of options")
 ] in
   let usage_msg = "usage : xenstored [--config-file ] 
[--no-domain-init] [--help] [--no-fork] [--reraise-top-level] [--restart] 
[--disable-socket]" in
   Arg.parse speclist (fun _ -> ()) usage_msg;
+  let () =
+if !help then begin
+  if !live_reload then
+(*
+  Transform --live --help into --config-test for backward compat with
+  running code during live update.
+  Caller will validate config and exit
+*)
+config_test := true
+  else begin
+Arg.usage_string speclist usage_msg |> print_endline;
+exit 0
+  end
+end
+  in
   {
 domain_init = !domain_init;
 activate_access_log = !activate_access_log;
@@ -70,4 +95,5 @@ let do_argv =
 restart = !restart;
 live_reload = !live_reload;
 disable_socket = !disable_socket;
+config_test = !config_test;
   }
diff --git a/tools/ocaml/xenstored/xenstored.ml 
b/tools/ocaml/xenstored/xenstored.ml
index 366437b396..1aaa3e995e 100644
--- a/tools/ocaml/xenstored/xenstored.ml
+++ b/tools/ocaml/xenstored/xenstored.ml
@@ -88,7 +88,7 @@ let default_pidfile = Paths.xen_run_dir ^ "/xenstored.pid"
 
 let ring_scan_interval = ref 20
 
-let parse_config filename =
+let parse_config ?(strict=false) filename =
   let pidfile = ref default_pidfile in
   let options = [
 ("merge-activate", Config.Set_bool Transaction.do_coalesce);
@@ -129,11 +129,12 @@ let parse_config filename =
 ("xenstored-port", Config.Set_string Domains.xenstored_port); ] in
   begin try Config.read filename options (fun _ _ -> raise Not_found)
 with
-| Config.Error err -> List.iter (fun (k, e) ->
+| Config.Error err as e -> List.iter (fun (k, e) ->
 match e with
 | "unknown key" -> eprintf "config: unknown key %s\n" k
 | _ -> eprintf "config: %s: %s\n" k e
   ) err;
+  if strict then raise e
 | Sys_error m -> eprintf "error: config: %s\n" m;
   end;
   !pidfile
@@ -358,6 +359,12 @@ let tweak_gc () =
 let () =
   Printexc.set_uncaught_exception_handler Logging.fallback_exception_handler;
   let cf = do_argv in
+  if cf.config_test then begin
+let path = config_filename cf in
+let _pidfile:string = parse_config ~strict:true path in
+Printf.printf "Configuration valid at %s\n%!" path;
+exit 0
+  end;
   let pidfile =
 if Sys.file_exists (config_filename cf) then
   parse_config (config_filename cf)
-- 
2.34.1




[PATCH v4 03/11] CODING_STYLE(tools/ocaml): add 'make format' and remove tabs

2022-12-16 Thread Edwin Török
See CODING_STYLE: Xen uses spaces, not tabs.

* OCaml code:

Using `ocp-indent` for now to just make minimal modifications in
tabs vs spaces and get the right indentation.
We can introduce `ocamlformat` later.

* C stubs:

just replace tabs with spaces now, using `indent` or `clang-format`
would change code too much for 4.17.

This avoids perpetuating a formatting style that is inconsistent with
the rest of Xen, and that makes preparing and submitting patches more
difficult (OCaml indentation tools usually only support spaces, not tabs).

No functional change.

Signed-off-by: Edwin Török 

--
Reason for inclusion:
- avoid perpetuating a different coding style (I thought tabs were
  mandated by Xen, and was about to fix up my editor config to match
  when I realized Xen already mandates the use of spaces)
- should make submitting patches for OCaml easier (OCaml indentation
  tools know only about spaces, so I either can't use them, or have to
  manually adjust indentation every time I submit a patch)
- it can be verified that the only change here is the Makefile change
  for the new rule, 'git log -p -1 -w' should be otherwise empty

Changes since v3:
- this didn't make it into 4.17.0, we'll reconsider for 4.17.1, for now
  apply just to master which is open again
- separate introducing the rule from actual reformatting

Cc: Christian Lindig 
---
 tools/ocaml/Makefile | 5 +
 1 file changed, 5 insertions(+)

diff --git a/tools/ocaml/Makefile b/tools/ocaml/Makefile
index a7c04b6546..274ba15d75 100644
--- a/tools/ocaml/Makefile
+++ b/tools/ocaml/Makefile
@@ -34,3 +34,8 @@ build-tools-oxenstored:
$(MAKE) -s -C libs/xb
$(MAKE) -s -C libs/xc
$(MAKE) -C xenstored
+
+.PHONY: format
+format:
+   git ls-files '*.ml' '*.mli' | xargs -n1 ocp-indent -i
+   git ls-files '*.c' '*.h' | xargs -n1 sed -ie 's/\t//g'
-- 
2.34.1




[PATCH v4 05/11] CODING-STYLE(tools/ocaml): add .editorconfig to clarify indentation uses spaces

2022-12-16 Thread Edwin Török
Add an .editorconfig to make it easier to keep patches compatible with
Xen's coding style, and to reemphasize what Xen's coding style is.

I thought that Xen demands tabs rather than spaces (which is more
difficult with OCaml because indentation tools use spaces,
and the use of tabs requires changing editor settings),
however CODING-STYLE says it is spaces.

Document this explicitly by adding a .editorconfig file (see editorconfig.org),
which is an editor agnostic format for specifying basic style properties like
indentation, either with native support in editors or via plugins.

It is safer than modelines because it only supports controlling a
restricted set of editor properties and not arbitrary commands as Vim
modelines would have, and works with editors other than Vim too.
(Vim has a deny list for modeline sandboxing, which is error-prone
because every time a new command gets added it needs to be added to the
deny list, which has been the source of a few CVEs in the past
and I disable Vim modelines everywhere as a precaution).

This file is added as a convenience for those who might have an editor
that supports it, and its presence should have no impact on those that
do not (want to) use it.
It also won't cause re-indentation of existing files when edited, only
newly added lines would follow the convention.

No functional change.

Signed-off-by: Edwin Török 
Cc: Christian Lindig 
---
 .editorconfig | 20 
 1 file changed, 20 insertions(+)
 create mode 100644 .editorconfig

diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 00..cb2f27c581
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,20 @@
+# See ./CODING_STYLE
+root = true
+
+[*]
+end_of_line = lf
+indent_style = space
+charset = utf-8
+max_line_length = 79
+trim_trailing_whitespace = true
+insert_final_newline = true
+
+# Makefiles must use tabs, otherwise they don't work
+[{Makefile,*.mk,Makefile.rules}]
+indent_style = tabs
+
+[*.{c,h}]
+indent_size = 4
+
+[*.{ml,mli}]
+indent_size = 2
-- 
2.34.1




[PATCH v4 09/11] tools/ocaml/libs/xb: check for unmapped ring before accessing it

2022-12-16 Thread Edwin Török
Xenmmap can unmap the ring, check for this condition before accessing it
to avoid crashing on an unmapped page.

Note that we cannot use the usual OCaml finalizers (like bigarray would) to
perform the unmap, because that might keep a reference count to a foreign
domain's memory that we want to release before destroying the domain.

Signed-off-by: Edwin Török 
---
Changes:
* new patch
---
 tools/ocaml/libs/xb/xs_ring_stubs.c | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tools/ocaml/libs/xb/xs_ring_stubs.c 
b/tools/ocaml/libs/xb/xs_ring_stubs.c
index 1e472d0bbf..28c79ee139 100644
--- a/tools/ocaml/libs/xb/xs_ring_stubs.c
+++ b/tools/ocaml/libs/xb/xs_ring_stubs.c
@@ -35,7 +35,14 @@
 #include 
 #include "mmap_stubs.h"
 
-#define GET_C_STRUCT(a) ((struct mmap_interface *) Data_abstract_val(a))
+static struct mmap_interface* check_addr(struct mmap_interface *interface)
+{
+if (!interface->addr || interface->addr == MAP_FAILED)
+caml_failwith("ring is not mapped");
+return interface;
+}
+
+#define GET_C_STRUCT(a) check_addr((struct mmap_interface *) 
Data_abstract_val(a))
 
 /*
  * Bytes_val has been introduced by Ocaml 4.06.1. So define our own version
@@ -167,8 +174,6 @@ CAMLprim value ml_interface_set_server_features(value 
interface, value v)
 {
 CAMLparam2(interface, v);
 struct xenstore_domain_interface *intf = GET_C_STRUCT(interface)->addr;
-if (intf == (void*)MAP_FAILED)
-caml_failwith("Interface closed");
 
 intf->server_features = Int_val(v);
 
-- 
2.34.1




[PATCH v4 00/11] OCaml fixes

2022-12-16 Thread Edwin Török
From: Edwin Török 

Various OCaml fixes, some of which got Acked already.

Note: the Data_abstract_val got Acked but not yet commited because we thought
there might be a better way with finalizers/etc.
It is not possible to use finalizers here but there is a new followup patch in
this series that detects use-after-free and raises an exception without
trigerring undefined behaviour or crashes (see 'xb: check for unmapped ring').

The formatting patch was controversial during the 4.17 freeze, but master is
open now, and both the maintainer and myself agree with it,
so I think we should be able to do it now.
I've split it up into 2: add the formatting rules, and the actual formatting,
so that it is easier to redo the formatting patch when this gets backported.

I think we should just commit it rather than worry that there might be more
bugfixes to come after and attempt to rebase the bugfixes without the
formatting patch (introducing bugs in the process), especially that one of the
patches not yet posted to fix use of enter/leave changes quite a significant
part of all C stubs, and I don't have a way of developing those patches without
the formatting patch.

The rest are various bugfixes to the C bindings, and I have more to send
after I've tested them
(almost all uses of enter/leave blocking section are
wrong because they access OCaml values in non-safe ways with _H,
but the GC may have already moved them which would cause a crash,
that changes a lot of bindings and needs more testing before sending out,
and I've got a few more OCaml 5 fixes too).

Edwin Török (11):
  tools/ocaml/libs/{xb, mmap}: use Data_abstract_val wrapper
  tools/ocaml/xenstored/Makefile: use ocamldep -sort for linking order
  CODING_STYLE(tools/ocaml): add 'make format' and remove tabs
  tools/ocaml: run "make format"
  CODING-STYLE(tools/ocaml): add .editorconfig to clarify indentation
uses spaces
  tools/ocaml: add .clang-format
  fixup! tools/ocaml/xenctrl: OCaml 5 support, fix use-after-free
  tools/ocaml/libs/mmap: mark mmap/munmap as blocking and raise
Unix_error on failure
  tools/ocaml/libs/xb: check for unmapped ring before accessing it
  tools/ocaml/xenstored: validate config file before live update
  tools/ocaml/libs/xb: drop Xs_ring.write

 .editorconfig |   20 +
 tools/ocaml/.clang-format |9 +
 tools/ocaml/Makefile  |5 +
 tools/ocaml/libs/eventchn/xeneventchn_stubs.c |  194 +-
 tools/ocaml/libs/mmap/mmap_stubs.h|8 +-
 tools/ocaml/libs/mmap/xenmmap.ml  |2 +-
 tools/ocaml/libs/mmap/xenmmap.mli |4 +-
 tools/ocaml/libs/mmap/xenmmap_stubs.c |  123 +-
 tools/ocaml/libs/xb/op.ml |   76 +-
 tools/ocaml/libs/xb/packet.ml |   30 +-
 tools/ocaml/libs/xb/partial.ml|   48 +-
 tools/ocaml/libs/xb/xb.ml |  416 ++--
 tools/ocaml/libs/xb/xb.mli|  106 +-
 tools/ocaml/libs/xb/xenbus_stubs.c|   50 +-
 tools/ocaml/libs/xb/xs_ring.ml|   31 +-
 tools/ocaml/libs/xb/xs_ring_stubs.c   |  221 +-
 tools/ocaml/libs/xc/xenctrl.ml|  326 +--
 tools/ocaml/libs/xc/xenctrl.mli   |   12 +-
 tools/ocaml/libs/xc/xenctrl_stubs.c   | 1556 ++---
 tools/ocaml/libs/xentoollog/caml_xentoollog.h |6 +-
 .../ocaml/libs/xentoollog/xentoollog_stubs.c  |  196 +-
 tools/ocaml/libs/xl/xenlight_stubs.c  | 2022 -
 tools/ocaml/libs/xs/queueop.ml|   48 +-
 tools/ocaml/libs/xs/xs.ml |  220 +-
 tools/ocaml/libs/xs/xs.mli|   46 +-
 tools/ocaml/libs/xs/xsraw.ml  |  300 +--
 tools/ocaml/libs/xs/xst.ml|   76 +-
 tools/ocaml/libs/xs/xst.mli   |   20 +-
 tools/ocaml/test/dmesg.ml |   26 +-
 tools/ocaml/test/list_domains.ml  |4 +-
 tools/ocaml/test/raise_exception.ml   |4 +-
 tools/ocaml/test/xtl.ml   |   28 +-
 tools/ocaml/xenstored/Makefile|6 +-
 tools/ocaml/xenstored/config.ml   |  156 +-
 tools/ocaml/xenstored/connection.ml   |  578 ++---
 tools/ocaml/xenstored/connections.ml  |  294 +--
 tools/ocaml/xenstored/disk.ml |  218 +-
 tools/ocaml/xenstored/domain.ml   |  116 +-
 tools/ocaml/xenstored/domains.ml  |  298 +--
 tools/ocaml/xenstored/event.ml|   28 +-
 tools/ocaml/xenstored/history.ml  |   62 +-
 tools/ocaml/xenstored/logging.ml  |  478 ++--
 tools/ocaml/xenstored/packet.ml   |   20 +-
 tools/ocaml/xenstored/parse_arg.ml|  132 +-
 tools/ocaml/xenstored/perms.ml|  216 +-
 tools/ocaml/xenstored/poll.ml |   68 +-
 tools/ocaml/xenstored/poll.mli|4 +-
 t

[PATCH v4 01/11] tools/ocaml/libs/{xb, mmap}: use Data_abstract_val wrapper

2022-12-16 Thread Edwin Török
This is not strictly necessary since it is essentially a no-op
currently: a cast to void* and value*, even in OCaml 5.0.

However it does make it clearer that what we have here is not a regular
OCaml value, but one allocated with Abstract_tag or Custom_tag,
and follows the example from the manual more closely:
https://v2.ocaml.org/manual/intfc.html#ss:c-outside-head

It also makes it clearer that these modules have been reviewed for
compat with OCaml 5.0.

We cannot use OCaml finalizers here, because we want control over when to unmap
these pages from remote domains.
A follow-up commit will add use-after-free detection instead.

No functional change.

Signed-off-by: Edwin Török 
Acked-by: Christian Lindig 
---
Reason for inclusion in 4.17:
- make code follow best practice for upcoming OCaml 5.0 compiler (already in 
beta)

Changes since v2:
- add Acked-by line

Changes since v3:
- mention that use-after-free is fixed in another commit, and we cannot use
  finalizers here
---
 tools/ocaml/libs/mmap/mmap_stubs.h| 4 
 tools/ocaml/libs/mmap/xenmmap_stubs.c | 2 +-
 tools/ocaml/libs/xb/xs_ring_stubs.c   | 2 +-
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/tools/ocaml/libs/mmap/mmap_stubs.h 
b/tools/ocaml/libs/mmap/mmap_stubs.h
index 65e4239890..66f18d4406 100644
--- a/tools/ocaml/libs/mmap/mmap_stubs.h
+++ b/tools/ocaml/libs/mmap/mmap_stubs.h
@@ -30,4 +30,8 @@ struct mmap_interface
int len;
 };
 
+#ifndef Data_abstract_val
+#define Data_abstract_val(x) ((void*)(value*)(x))
+#endif
+
 #endif
diff --git a/tools/ocaml/libs/mmap/xenmmap_stubs.c 
b/tools/ocaml/libs/mmap/xenmmap_stubs.c
index e2ce088e25..141dedb78c 100644
--- a/tools/ocaml/libs/mmap/xenmmap_stubs.c
+++ b/tools/ocaml/libs/mmap/xenmmap_stubs.c
@@ -28,7 +28,7 @@
 #include 
 #include 
 
-#define Intf_val(a) ((struct mmap_interface *) a)
+#define Intf_val(a) ((struct mmap_interface *) Data_abstract_val(a))
 
 static int mmap_interface_init(struct mmap_interface *intf,
int fd, int pflag, int mflag,
diff --git a/tools/ocaml/libs/xb/xs_ring_stubs.c 
b/tools/ocaml/libs/xb/xs_ring_stubs.c
index 7a91fdee75..cc9114029f 100644
--- a/tools/ocaml/libs/xb/xs_ring_stubs.c
+++ b/tools/ocaml/libs/xb/xs_ring_stubs.c
@@ -35,7 +35,7 @@
 #include 
 #include "mmap_stubs.h"
 
-#define GET_C_STRUCT(a) ((struct mmap_interface *) a)
+#define GET_C_STRUCT(a) ((struct mmap_interface *) Data_abstract_val(a))
 
 /*
  * Bytes_val has been introduced by Ocaml 4.06.1. So define our own version
-- 
2.34.1




[PATCH v4 08/11] tools/ocaml/libs/mmap: mark mmap/munmap as blocking and raise Unix_error on failure

2022-12-16 Thread Edwin Török
These functions can potentially take some time,
so allow other OCaml code to proceed meanwhile (if any).

Also raise a Unix_error based on `errno` if `mmap` fails, instead of just
calling failwith (which would lose the error reason).

Signed-off-by: Edwin Török 
---

Changes since v2:
* repost of lost patch from 2020 (posted to ML on 2021
  
https://patchwork.kernel.org/project/xen-devel/patch/294a60be29027d33b0a1d154b7d576237c7dd420.1620755942.git.edvin.to...@citrix.com/)
---
 tools/ocaml/libs/mmap/xenmmap_stubs.c | 15 ---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/tools/ocaml/libs/mmap/xenmmap_stubs.c 
b/tools/ocaml/libs/mmap/xenmmap_stubs.c
index bf864a7c32..5b8c240ef9 100644
--- a/tools/ocaml/libs/mmap/xenmmap_stubs.c
+++ b/tools/ocaml/libs/mmap/xenmmap_stubs.c
@@ -27,6 +27,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #define Intf_val(a) ((struct mmap_interface *) Data_abstract_val(a))
 
@@ -35,7 +37,9 @@ static int mmap_interface_init(struct mmap_interface *intf,
int len, int offset)
 {
 intf->len = len;
+caml_enter_blocking_section();
 intf->addr = mmap(NULL, len, pflag, mflag, fd, offset);
+caml_leave_blocking_section();
 return (intf->addr == MAP_FAILED) ? errno : 0;
 }
 
@@ -64,17 +68,22 @@ CAMLprim value stub_mmap_init(value fd, value pflag, value 
mflag,
 if (mmap_interface_init(Intf_val(result), Int_val(fd),
 c_pflag, c_mflag,
 Int_val(len), Int_val(offset)))
-caml_failwith("mmap");
+uerror("mmap", Nothing);
 CAMLreturn(result);
 }
 
 CAMLprim value stub_mmap_final(value intf)
 {
 CAMLparam1(intf);
+struct mmap_interface interface = *Intf_val(intf);
 
-if (Intf_val(intf)->addr != MAP_FAILED)
-munmap(Intf_val(intf)->addr, Intf_val(intf)->len);
+/* mark it as freed, in case munmap below fails, so we don't retry it */
 Intf_val(intf)->addr = MAP_FAILED;
+if (interface.addr != MAP_FAILED) {
+caml_enter_blocking_section();
+munmap(interface.addr, interface.len);
+caml_leave_blocking_section();
+}
 
 CAMLreturn(Val_unit);
 }
-- 
2.34.1




[PATCH v4 06/11] tools/ocaml: add .clang-format

2022-12-16 Thread Edwin Török
Add a .clang-format configuration that tries to match CODING_STYLE where
possible.

I was not able to express the special casing of braces after 'do'
though, this can only be controlled generally for all control
statements.
It is imperfect, but should be better than the existing bindings, which
do not follow Xen coding style.

Add this to tools/ocaml first because:
* there are relatively few C files here, and it is a good place to start with
* it'd be useful to make these follow Xen's CODING_STYLE
(which they currently do not because they use tabs for example)
* they change relatively infrequently, so shouldn't cause issues with
  backporting security fixes (could either backport the reindentation
  patch too, or use git cherry-pick with `-Xignore-space-change`)

Does not yet reformat any code.

No functional change.

Signed-off-by: Edwin Török 
Acked-by: Christian Lindig 
---
Changes since v1:
* change commit title to reflect this is for OCaml subtree only
* don't mention stdint.h here, that may be fixed in a different way elsewhere
* add Acked-by line
---
 tools/ocaml/.clang-format | 9 +
 1 file changed, 9 insertions(+)
 create mode 100644 tools/ocaml/.clang-format

diff --git a/tools/ocaml/.clang-format b/tools/ocaml/.clang-format
new file mode 100644
index 00..7ff88ee043
--- /dev/null
+++ b/tools/ocaml/.clang-format
@@ -0,0 +1,9 @@
+BasedOnStyle: GNU
+IndentWidth: 4
+
+# override GNU to match Xen ../../CODING_STYLE more closely
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+SpacesInConditionalStatement: true
+SpaceBeforeParens: ControlStatements
+BreakBeforeBraces: Allman
-- 
2.34.1




[PATCH v2 4/4] tools/ocaml: add .clang-format

2022-12-02 Thread Edwin Török
Add a .clang-format configuration that tries to match CODING_STYLE where
possible.

I was not able to express the special casing of braces after 'do'
though, this can only be controlled generally for all control
statements.
It is imperfect, but should be better than the existing bindings, which
do not follow Xen coding style.

Add this to tools/ocaml first because:
* there are relatively few C files here, and it is a good place to start with
* it'd be useful to make these follow Xen's CODING_STYLE
(which they currently do not because they use tabs for example)
* they change relatively infrequently, so shouldn't cause issues with
  backporting security fixes (could either backport the reindentation
  patch too, or use git cherry-pick with `-Xignore-space-change`)

Does not yet reformat any code.

No functional change.

Signed-off-by: Edwin Török 
Acked-by: Christian Lindig 
---
Changes since v1:
* change commit title to reflect this is for OCaml subtree only
* don't mention stdint.h here, that may be fixed in a different way elsewhere
---
 tools/ocaml/.clang-format | 9 +
 1 file changed, 9 insertions(+)
 create mode 100644 tools/ocaml/.clang-format

diff --git a/tools/ocaml/.clang-format b/tools/ocaml/.clang-format
new file mode 100644
index 00..7ff88ee043
--- /dev/null
+++ b/tools/ocaml/.clang-format
@@ -0,0 +1,9 @@
+BasedOnStyle: GNU
+IndentWidth: 4
+
+# override GNU to match Xen ../../CODING_STYLE more closely
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+SpacesInConditionalStatement: true
+SpaceBeforeParens: ControlStatements
+BreakBeforeBraces: Allman
-- 
2.34.1




[PATCH v2 2/4] tools/ocaml/libs/xc: add binding to xc_evtchn_status

2022-12-02 Thread Edwin Török
There is no API or ioctl to query event channel status, it is only
present in xenctrl.h

The C union is mapped to an OCaml variant exposing just the value from the
correct union tag.

The information provided here is similar to 'lsevtchn', but rather than
parsing its output it queries the underlying API directly.

Signed-off-by: Edwin Török 
---
Changes since v1:
* drop paragraph about where this is used
* add comment about max port
* use Xeneventchn.virq_t instead of int, add a dependency: xc -> eventchn
* initialize struct without memset-ing first
* use 2 CAMLreturn, I found an example in the OCaml stdlib that does that so 
should be future-proof 
https://github.com/ocaml/ocaml/blob/663e8d219f566095e3a9497c5bae07b6a95cae39/otherlibs/unix/dup_win32.c#L52-L77
* use Tag_some, defining it if needed
* fix typo on failwith
---
 tools/ocaml/libs/Makefile   |  2 +-
 tools/ocaml/libs/xc/META.in |  2 +-
 tools/ocaml/libs/xc/Makefile|  2 +-
 tools/ocaml/libs/xc/xenctrl.ml  | 15 +++
 tools/ocaml/libs/xc/xenctrl.mli | 15 +++
 tools/ocaml/libs/xc/xenctrl_stubs.c | 67 +
 6 files changed, 100 insertions(+), 3 deletions(-)

diff --git a/tools/ocaml/libs/Makefile b/tools/ocaml/libs/Makefile
index 7e7c27e2d5..15f45a6d66 100644
--- a/tools/ocaml/libs/Makefile
+++ b/tools/ocaml/libs/Makefile
@@ -4,7 +4,7 @@ include $(XEN_ROOT)/tools/Rules.mk
 SUBDIRS= \
mmap \
xentoollog \
-   xc eventchn \
+   eventchn xc\
xb xs xl
 
 .PHONY: all
diff --git a/tools/ocaml/libs/xc/META.in b/tools/ocaml/libs/xc/META.in
index 2ff4dcb6bf..6a273936a3 100644
--- a/tools/ocaml/libs/xc/META.in
+++ b/tools/ocaml/libs/xc/META.in
@@ -1,5 +1,5 @@
 version = "@VERSION@"
 description = "Xen Control Interface"
-requires = "unix,xenmmap"
+requires = "unix,xenmmap,xeneventchn"
 archive(byte) = "xenctrl.cma"
 archive(native) = "xenctrl.cmxa"
diff --git a/tools/ocaml/libs/xc/Makefile b/tools/ocaml/libs/xc/Makefile
index 3b76e9ad7b..1d9fecb06e 100644
--- a/tools/ocaml/libs/xc/Makefile
+++ b/tools/ocaml/libs/xc/Makefile
@@ -4,7 +4,7 @@ include $(OCAML_TOPLEVEL)/common.make
 
 CFLAGS += -I../mmap $(CFLAGS_libxenctrl) $(CFLAGS_libxenguest)
 CFLAGS += $(APPEND_CFLAGS)
-OCAMLINCLUDE += -I ../mmap
+OCAMLINCLUDE += -I ../mmap -I ../eventchn
 
 OBJS = xenctrl
 INTF = xenctrl.cmi
diff --git a/tools/ocaml/libs/xc/xenctrl.ml b/tools/ocaml/libs/xc/xenctrl.ml
index 2ed7454b16..5dac47991e 100644
--- a/tools/ocaml/libs/xc/xenctrl.ml
+++ b/tools/ocaml/libs/xc/xenctrl.ml
@@ -267,6 +267,21 @@ external evtchn_alloc_unbound: handle -> domid -> domid -> 
int
   = "stub_xc_evtchn_alloc_unbound"
 external evtchn_reset: handle -> domid -> unit = "stub_xc_evtchn_reset"
 
+(* FIFO has theoretical maximum of 2^28 ports, fits in an int *)
+type evtchn_interdomain = { dom: domid; port: int}
+
+type evtchn_stat =
+  | EVTCHNSTAT_unbound of domid
+  | EVTCHNSTAT_interdomain of evtchn_interdomain
+  | EVTCHNSTAT_pirq of int
+  | EVTCHNSTAT_virq of Xeneventchn.virq_t
+  | EVTCHNSTAT_ipi
+
+type evtchn_status = { vcpu: int; status: evtchn_stat }
+
+external evtchn_status: handle -> domid -> int -> evtchn_status option =
+  "stub_xc_evtchn_status"
+
 external readconsolering: handle -> string = "stub_xc_readconsolering"
 
 external send_debug_keys: handle -> string -> unit = "stub_xc_send_debug_keys"
diff --git a/tools/ocaml/libs/xc/xenctrl.mli b/tools/ocaml/libs/xc/xenctrl.mli
index 0f80aafea0..6c9206bc74 100644
--- a/tools/ocaml/libs/xc/xenctrl.mli
+++ b/tools/ocaml/libs/xc/xenctrl.mli
@@ -206,6 +206,21 @@ external shadow_allocation_get : handle -> domid -> int
 external evtchn_alloc_unbound : handle -> domid -> domid -> int
   = "stub_xc_evtchn_alloc_unbound"
 external evtchn_reset : handle -> domid -> unit = "stub_xc_evtchn_reset"
+
+type evtchn_interdomain = { dom: domid; port: int}
+
+type evtchn_stat =
+  | EVTCHNSTAT_unbound of domid
+  | EVTCHNSTAT_interdomain of evtchn_interdomain
+  | EVTCHNSTAT_pirq of int
+  | EVTCHNSTAT_virq of Xeneventchn.virq_t
+  | EVTCHNSTAT_ipi
+
+type evtchn_status = { vcpu: int; status: evtchn_stat }
+
+external evtchn_status: handle -> domid -> int -> evtchn_status option =
+  "stub_xc_evtchn_status"
+
 external readconsolering : handle -> string = "stub_xc_readconsolering"
 external send_debug_keys : handle -> string -> unit = "stub_xc_send_debug_keys"
 external physinfo : handle -> physinfo = "stub_xc_physinfo"
diff --git a/tools/ocaml/libs/xc/xenctrl_stubs.c 
b/tools/ocaml/libs/xc/xenctrl_stubs.c
index d30585f21c..a492ea17fd 100644
--- a/tools/ocaml/libs/xc/xenctrl_stubs.c
+++ b/tools/ocaml/libs/xc/xenctrl_stubs.c
@@ -44,6 +44,10 @@
 #define Val_none (Val_int(0))
 #endif
 
+#ifndef Tag_some
+#define Tag_some 0
+

[PATCH v2 1/4] CODING-STYLE: add .editorconfig to clarify indentation uses spaces

2022-12-02 Thread Edwin Török
Add an .editorconfig to make it easier to keep patches compatible with
Xen's coding style, and to reemphasize what Xen's coding style is.

I thought that Xen demands tabs rather than spaces (which is more
difficult with OCaml because indentation tools use spaces,
and the use of tabs requires changing editor settings),
however CODING-STYLE says it is spaces.

Document this explicitly by adding a .editorconfig file (see editorconfig.org),
which is an editor agnostic format for specifying basic style properties like
indentation, either with native support in editors or via plugins.

It is safer than modelines because it only supports controlling a
restricted set of editor properties and not arbitrary commands as Vim
modelines would have, and works with editors other than Vim too.
(Vim has a deny list for modeline sandboxing, which is error-prone
because every time a new command gets added it needs to be added to the
deny list, which has been the source of a few CVEs in the past
and I disable Vim modelines everywhere as a precaution).

This file is added as a convenience for those who might have an editor
that supports it, and its presence should have no impact on those that
do not (want to) use it.
It also won't cause re-indentation of existing files when edited, only
newly added lines would follow the convention.

No functional change.

Signed-off-by: Edwin Török 
---
 .editorconfig | 20 
 1 file changed, 20 insertions(+)
 create mode 100644 .editorconfig

diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 00..cb2f27c581
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,20 @@
+# See ./CODING_STYLE
+root = true
+
+[*]
+end_of_line = lf
+indent_style = space
+charset = utf-8
+max_line_length = 79
+trim_trailing_whitespace = true
+insert_final_newline = true
+
+# Makefiles must use tabs, otherwise they don't work
+[{Makefile,*.mk,Makefile.rules}]
+indent_style = tabs
+
+[*.{c,h}]
+indent_size = 4
+
+[*.{ml,mli}]
+indent_size = 2
-- 
2.34.1




[PATCH v2 3/4] tools/ocaml/libs/xc: add hvm_param_get binding

2022-12-02 Thread Edwin Török
Not to be confused which hvm_get_param, which also exists and has a
different, more error-prone interface.

This one always returns a 64-bit value, and that is retained in the
OCaml binding as well, returning 'int64' (and not int, or nativeint
which might have a different size).

The integer here is unsigned in the C API, however OCaml only has signed 
integers.

No bits are lost, it is just a matter of interpretation when printing
and for certain arithmetic operations, however in the cases where the
MSB is set it is very likely that the value is an address and no
arithmetic should be performed on the OCaml side on it.
(this is not a new problem with this binding, but worth mentioning given
the difference in types)

Signed-off-by: Edwin Török 
---
Changes since v1:
* drop accidental extra numbers in variant names
* use 'val' instead of 'result' for local var
* add binding for hvm_param_set
---
 tools/ocaml/libs/xc/xenctrl.ml  | 47 
 tools/ocaml/libs/xc/xenctrl.mli | 48 +
 tools/ocaml/libs/xc/xenctrl_stubs.c | 32 +++
 3 files changed, 127 insertions(+)

diff --git a/tools/ocaml/libs/xc/xenctrl.ml b/tools/ocaml/libs/xc/xenctrl.ml
index 5dac47991e..370dac3fc8 100644
--- a/tools/ocaml/libs/xc/xenctrl.ml
+++ b/tools/ocaml/libs/xc/xenctrl.ml
@@ -299,6 +299,53 @@ external map_foreign_range: handle -> domid -> int
   -> nativeint -> Xenmmap.mmap_interface
   = "stub_map_foreign_range"
 
+type hvm_param =
+  | HVM_PARAM_CALLBACK_IRQ
+  | HVM_PARAM_STORE_PFN
+  | HVM_PARAM_STORE_EVTCHN
+  | HVM_PARAM_UNDEF_3
+  | HVM_PARAM_PAE_ENABLED
+  | HVM_PARAM_IOREQ_PFN
+  | HVM_PARAM_BUFIOREQ_PFN
+  | HVM_PARAM_UNDEF_7
+  | HVM_PARAM_UNDEF_8
+  | HVM_PARAM_VIRIDIAN
+  | HVM_PARAM_TIMER_MODE0
+  | HVM_PARAM_HPET_ENABLED1
+  | HVM_PARAM_IDENT_PT2
+  | HVM_PARAM_UNDEF_13
+  | HVM_PARAM_ACPI_S_STATE
+  | HVM_PARAM_VM86_TSS
+  | HVM_PARAM_VPT_ALIGN
+  | HVM_PARAM_CONSOLE_PFN
+  | HVM_PARAM_CONSOLE_EVTCHN
+  | HVM_PARAM_ACPI_IOPORTS_LOCATION
+  | HVM_PARAM_MEMORY_EVENT_CR0
+  | HVM_PARAM_MEMORY_EVENT_CR3
+  | HVM_PARAM_MEMORY_EVENT_CR4
+  | HVM_PARAM_MEMORY_EVENT_INT3
+  | HVM_PARAM_NESTEDHVM
+  | HVM_PARAM_MEMORY_EVENT_SINGLE_STEP
+  | HVM_PARAM_UNDEF_26
+  | HVM_PARAM_PAGING_RING_PFN
+  | HVM_PARAM_MONITOR_RING_PFN
+  | HVM_PARAM_SHARING_RING_PFN
+  | HVM_PARAM_MEMORY_EVENT_MSR
+  | HVM_PARAM_TRIPLE_FAULT_REASON
+  | HVM_PARAM_IOREQ_SERVER_PFN
+  | HVM_PARAM_NR_IOREQ_SERVER_PAGES
+  | HVM_PARAM_VM_GENERATION_ID_ADDR
+  | HVM_PARAM_ALTP2M
+  | HVM_PARAM_X87_FIP_WIDTH6
+  | HVM_PARAM_VM86_TSS_SIZED
+  | HVM_PARAM_MCA_CAP
+
+external hvm_param_get: handle -> domid -> hvm_param -> int64
+  = "stub_xc_hvm_param_get"
+
+external hvm_param_set: handle -> domid -> hvm_param -> int64 -> unit
+  = "stub_xc_hvm_param_set"
+
 external domain_assign_device: handle -> domid -> (int * int * int * int) -> 
unit
   = "stub_xc_domain_assign_device"
 external domain_deassign_device: handle -> domid -> (int * int * int * int) -> 
unit
diff --git a/tools/ocaml/libs/xc/xenctrl.mli b/tools/ocaml/libs/xc/xenctrl.mli
index 6c9206bc74..e18d5cddb7 100644
--- a/tools/ocaml/libs/xc/xenctrl.mli
+++ b/tools/ocaml/libs/xc/xenctrl.mli
@@ -236,6 +236,54 @@ external map_foreign_range :
   handle -> domid -> int -> nativeint -> Xenmmap.mmap_interface
   = "stub_map_foreign_range"
 
+(* needs to be sorted according to its numeric value, watch out for gaps! *)
+type hvm_param =
+  | HVM_PARAM_CALLBACK_IRQ
+  | HVM_PARAM_STORE_PFN
+  | HVM_PARAM_STORE_EVTCHN
+  | HVM_PARAM_UNDEF_3
+  | HVM_PARAM_PAE_ENABLED
+  | HVM_PARAM_IOREQ_PFN
+  | HVM_PARAM_BUFIOREQ_PFN
+  | HVM_PARAM_UNDEF_7
+  | HVM_PARAM_UNDEF_8
+  | HVM_PARAM_VIRIDIAN
+  | HVM_PARAM_TIMER_MODE0
+  | HVM_PARAM_HPET_ENABLED1
+  | HVM_PARAM_IDENT_PT2
+  | HVM_PARAM_UNDEF_13
+  | HVM_PARAM_ACPI_S_STATE
+  | HVM_PARAM_VM86_TSS
+  | HVM_PARAM_VPT_ALIGN
+  | HVM_PARAM_CONSOLE_PFN
+  | HVM_PARAM_CONSOLE_EVTCHN
+  | HVM_PARAM_ACPI_IOPORTS_LOCATION
+  | HVM_PARAM_MEMORY_EVENT_CR0
+  | HVM_PARAM_MEMORY_EVENT_CR3
+  | HVM_PARAM_MEMORY_EVENT_CR4
+  | HVM_PARAM_MEMORY_EVENT_INT3
+  | HVM_PARAM_NESTEDHVM
+  | HVM_PARAM_MEMORY_EVENT_SINGLE_STEP
+  | HVM_PARAM_UNDEF_26
+  | HVM_PARAM_PAGING_RING_PFN
+  | HVM_PARAM_MONITOR_RING_PFN
+  | HVM_PARAM_SHARING_RING_PFN
+  | HVM_PARAM_MEMORY_EVENT_MSR
+  | HVM_PARAM_TRIPLE_FAULT_REASON
+  | HVM_PARAM_IOREQ_SERVER_PFN
+  | HVM_PARAM_NR_IOREQ_SERVER_PAGES
+  | HVM_PARAM_VM_GENERATION_ID_ADDR
+  | HVM_PARAM_ALTP2M
+  | HVM_PARAM_X87_FIP_WIDTH6
+  | HVM_PARAM_VM86_TSS_SIZED
+  | HVM_PARAM_MCA_CAP
+
+external hvm_param_get: handle -> domid -> hvm_param -> int64
+  = "stub_xc_hvm_param_get"
+
+external hvm_param_set: handle -> domid -> hvm_param -> int64 -> unit
+  = "stub_xc_hvm_param_set"
+
 external domain_assign_device: handle -> domid -> (int 

[PATCH v2 0/4] OCaml bindings for hvm_param_get and xc_evtchn_status

2022-12-02 Thread Edwin Török
Changes since v1:
* dropped stdint.h patch, still being discussed on where to best fix it
* addressed review comments (see individual patches' changes section)

Edwin Török (4):
  CODING-STYLE: add .editorconfig to clarify indentation uses spaces
  tools/ocaml/libs/xc: add binding to xc_evtchn_status
  tools/ocaml/libs/xc: add hvm_param_get binding
  tools/ocaml: add .clang-format

 .editorconfig   | 20 ++
 tools/ocaml/.clang-format   |  9 +++
 tools/ocaml/libs/Makefile   |  2 +-
 tools/ocaml/libs/xc/META.in |  2 +-
 tools/ocaml/libs/xc/Makefile|  2 +-
 tools/ocaml/libs/xc/xenctrl.ml  | 62 ++
 tools/ocaml/libs/xc/xenctrl.mli | 63 ++
 tools/ocaml/libs/xc/xenctrl_stubs.c | 99 +
 8 files changed, 256 insertions(+), 3 deletions(-)
 create mode 100644 .editorconfig
 create mode 100644 tools/ocaml/.clang-format

-- 
2.34.1




[PATCH v1 2/5] tools/ocaml/libs/xc: add binding to xc_evtchn_status

2022-11-30 Thread Edwin Török
There is no API or ioctl to query event channel status, it is only
present in xenctrl.h

The C union is mapped to an OCaml variant exposing just the value from the
correct union tag.

Querying event channel status is useful when analyzing Windows VMs that
may have reset and changed the xenstore event channel port number from
what it initially got booted with.
The information provided here is similar to 'lstevtchn', but rather than
parsing its output it queries the underlying API directly.

Signed-off-by: Edwin Török 
---
 tools/ocaml/libs/xc/xenctrl.ml  | 14 +++
 tools/ocaml/libs/xc/xenctrl.mli | 15 +++
 tools/ocaml/libs/xc/xenctrl_stubs.c | 65 +
 3 files changed, 94 insertions(+)

diff --git a/tools/ocaml/libs/xc/xenctrl.ml b/tools/ocaml/libs/xc/xenctrl.ml
index 2ed7454b16..c21e391f98 100644
--- a/tools/ocaml/libs/xc/xenctrl.ml
+++ b/tools/ocaml/libs/xc/xenctrl.ml
@@ -267,6 +267,20 @@ external evtchn_alloc_unbound: handle -> domid -> domid -> 
int
   = "stub_xc_evtchn_alloc_unbound"
 external evtchn_reset: handle -> domid -> unit = "stub_xc_evtchn_reset"
 
+type evtchn_interdomain = { dom: domid; port: int}
+
+type evtchn_stat =
+  | EVTCHNSTAT_unbound of domid
+  | EVTCHNSTAT_interdomain of evtchn_interdomain
+  | EVTCHNSTAT_pirq of int
+  | EVTCHNSTAT_virq of int
+  | EVTCHNSTAT_ipi
+
+type evtchn_status = { vcpu: int; status: evtchn_stat }
+
+external evtchn_status: handle -> domid -> int -> evtchn_status option =
+  "stub_xc_evtchn_status"
+
 external readconsolering: handle -> string = "stub_xc_readconsolering"
 
 external send_debug_keys: handle -> string -> unit = "stub_xc_send_debug_keys"
diff --git a/tools/ocaml/libs/xc/xenctrl.mli b/tools/ocaml/libs/xc/xenctrl.mli
index 0f80aafea0..60e7902e66 100644
--- a/tools/ocaml/libs/xc/xenctrl.mli
+++ b/tools/ocaml/libs/xc/xenctrl.mli
@@ -206,6 +206,21 @@ external shadow_allocation_get : handle -> domid -> int
 external evtchn_alloc_unbound : handle -> domid -> domid -> int
   = "stub_xc_evtchn_alloc_unbound"
 external evtchn_reset : handle -> domid -> unit = "stub_xc_evtchn_reset"
+
+type evtchn_interdomain = { dom: domid; port: int}
+
+type evtchn_stat =
+  | EVTCHNSTAT_unbound of domid
+  | EVTCHNSTAT_interdomain of evtchn_interdomain
+  | EVTCHNSTAT_pirq of int
+  | EVTCHNSTAT_virq of int
+  | EVTCHNSTAT_ipi
+
+type evtchn_status = { vcpu: int; status: evtchn_stat }
+
+external evtchn_status: handle -> domid -> int -> evtchn_status option =
+  "stub_xc_evtchn_status"
+
 external readconsolering : handle -> string = "stub_xc_readconsolering"
 external send_debug_keys : handle -> string -> unit = "stub_xc_send_debug_keys"
 external physinfo : handle -> physinfo = "stub_xc_physinfo"
diff --git a/tools/ocaml/libs/xc/xenctrl_stubs.c 
b/tools/ocaml/libs/xc/xenctrl_stubs.c
index d30585f21c..67f3648391 100644
--- a/tools/ocaml/libs/xc/xenctrl_stubs.c
+++ b/tools/ocaml/libs/xc/xenctrl_stubs.c
@@ -641,6 +641,71 @@ CAMLprim value stub_xc_evtchn_reset(value xch, value domid)
 CAMLreturn(Val_unit);
 }
 
+CAMLprim value stub_xc_evtchn_status(value xch, value domid, value port)
+{
+CAMLparam3(xch, domid, port);
+CAMLlocal4(result, result_status, stat, interdomain);
+xc_evtchn_status_t status;
+int rc;
+
+memset(, 0, sizeof(status));
+status.dom = _D(domid);
+status.port = Int_val(port);
+
+caml_enter_blocking_section();
+rc = xc_evtchn_status(_H(xch), );
+caml_leave_blocking_section();
+
+if ( rc < 0 )
+failwith_xc(_H(xch));
+
+if ( status.status == EVTCHNSTAT_closed )
+result = Val_none;
+else
+{
+switch ( status.status )
+{
+case EVTCHNSTAT_unbound:
+stat = caml_alloc(1, 0); /* 1st non-constant constructor */
+Store_field(stat, 0, Val_int(status.u.unbound.dom));
+break;
+
+case EVTCHNSTAT_interdomain:
+interdomain = caml_alloc_tuple(2);
+Store_field(interdomain, 0, Val_int(status.u.interdomain.dom));
+Store_field(interdomain, 1, Val_int(status.u.interdomain.port));
+stat = caml_alloc(1, 1); /*  2nd non-constant constructor */
+Store_field(stat, 0, interdomain);
+break;
+case EVTCHNSTAT_pirq:
+stat = caml_alloc(1, 2); /* 3rd non-constant constructor */
+Store_field(stat, 0, Val_int(status.u.pirq));
+break;
+
+case EVTCHNSTAT_virq:
+stat = caml_alloc(1, 3); /* 4th non-constant constructor */
+Store_field(stat, 0, Val_int(status.u.virq));
+break;
+
+case EVTCHNSTAT_ipi:
+stat = Val_int(0); /* 1st constant constructor */
+break;
+
+default:
+caml_failwith("Unkown evtchn stat

[PATCH v1 0/5] OCaml bindings for hvm_param_get and xc_evtchn_status

2022-11-30 Thread Edwin Török
Add bindings to xc_evtchn_status and hvm_param_get, useful for xenopsd
and for recovery from failed live updates.

.editorconfig helps me format the source code with the desired Xen
coding style (now that the reindent patch has switched it to spaces as
desired by the Xen project).
If you don't have an editor set up to use editorconfig this is a no-op.

.clang-format is an experiment for the OCaml subtree in slowly moving
its code to be closer to the Xen coding style. There is no Xen coding
style as such in clang-format, this takes GNU as a base and tweaks it to
be as close to CODING_STYLE as possible (there is just one different in
handling of do/while as far as I can tell).
It should be an improvement over the current situation where the OCaml C
bindings do not follow Xen coding style, and further bindings added that
follow the style of the code around them would not follow it either.
It doesn't yet reformat anything with it, just allows someone that
submits patches to use it if desired (e.g. on new code).

Edwin Török (5):
  CODING-STYLE: add .editorconfig to clarify indentation uses spaces
  tools/ocaml/libs/xc: add binding to xc_evtchn_status
  tools/ocaml/libs/xc: add hvm_param_get binding
  tools/ocaml/libs/xb: add missing stdint.h
  CODING_STYLE: add .clang-format

 .editorconfig   | 20 +++
 tools/ocaml/.clang-format   |  9 
 tools/ocaml/libs/xb/xenbus_stubs.c  |  1 +
 tools/ocaml/libs/xc/xenctrl.ml  | 58 +
 tools/ocaml/libs/xc/xenctrl.mli | 60 +
 tools/ocaml/libs/xc/xenctrl_stubs.c | 81 +
 6 files changed, 229 insertions(+)
 create mode 100644 .editorconfig
 create mode 100644 tools/ocaml/.clang-format

-- 
2.34.1




[PATCH v1 4/5] tools/ocaml/libs/xb: add missing stdint.h

2022-11-30 Thread Edwin Török
xs_wire.h fails to compile without this, and a slight rearrangement of
header includes (e.g. by clang-format) could cause the file to fail to
compile.

Be more robust and include the needed header file.
---
 tools/ocaml/libs/xb/xenbus_stubs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/ocaml/libs/xb/xenbus_stubs.c 
b/tools/ocaml/libs/xb/xenbus_stubs.c
index e5206f64d4..ce6d33b23e 100644
--- a/tools/ocaml/libs/xb/xenbus_stubs.c
+++ b/tools/ocaml/libs/xb/xenbus_stubs.c
@@ -15,6 +15,7 @@
  */
 
 #include 
+#include 
 #include 
 #include 
 #include 
-- 
2.34.1




[PATCH v1 1/5] CODING-STYLE: add .editorconfig to clarify indentation uses spaces

2022-11-30 Thread Edwin Török
Add an .editorconfig to make it easier to keep patches compatible with
Xen's coding style, and to reemphasize what Xen's coding style is.

I thought that Xen demands tabs rather than spaces (which is more
difficult with OCaml because indentation tools use spaces,
and the use of tabs requires changing editor settings),
however CODING-STYLE says it is spaces.

Document this explicitly by adding a .editorconfig file (see editorconfig.org),
which is an editor agnostic format for specifying basic style properties like
indentation, either with native support in editors or via plugins.

It is safer than modelines because it only supports controlling a
restricted set of editor properties and not arbitrary commands as Vim
modelines would have, and works with editors other than Vim too.
(Vim has a deny list for modeline sandboxing, which is error-prone
because every time a new command gets added it needs to be added to the
deny list, which has been the source of a few CVEs in the past
and I disable Vim modelines everywhere as a precaution).

This file is added as a convenience for those who might have an editor
that supports it, and its presence should have no impact on those that
do not (want to) use it.
It also won't cause re-indentation of existing files when edited, only
newly added lines would follow the convention.

No functional change.

Signed-off-by: Edwin Török 
---
 .editorconfig | 20 
 1 file changed, 20 insertions(+)
 create mode 100644 .editorconfig

diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 00..cb2f27c581
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,20 @@
+# See ./CODING_STYLE
+root = true
+
+[*]
+end_of_line = lf
+indent_style = space
+charset = utf-8
+max_line_length = 79
+trim_trailing_whitespace = true
+insert_final_newline = true
+
+# Makefiles must use tabs, otherwise they don't work
+[{Makefile,*.mk,Makefile.rules}]
+indent_style = tabs
+
+[*.{c,h}]
+indent_size = 4
+
+[*.{ml,mli}]
+indent_size = 2
-- 
2.34.1




[PATCH v1 3/5] tools/ocaml/libs/xc: add hvm_param_get binding

2022-11-30 Thread Edwin Török
Not to be confused which hvm_get_param, which also exists and has a
different, more error-prone interface.

This one always returns a 64-bit value, and that is retained in the
OCaml binding as well, returning 'int64' (and not int, or nativeint
which might have a different size).

The integer here is unsigned in the C API, however OCaml only has signed 
integers.

No bits are lost, it is just a matter of interpretation when printing
and for certain arithmetic operations, however in the cases where the
MSB is set it is very likely that the value is an address and no
arithmetic should be performed on the OCaml side on it.
(this is not a new problem with this binding, but worth mentioning given
the difference in types)

Signed-off-by: Edwin Török 
---
 tools/ocaml/libs/xc/xenctrl.ml  | 44 
 tools/ocaml/libs/xc/xenctrl.mli | 45 +
 tools/ocaml/libs/xc/xenctrl_stubs.c | 16 ++
 3 files changed, 105 insertions(+)

diff --git a/tools/ocaml/libs/xc/xenctrl.ml b/tools/ocaml/libs/xc/xenctrl.ml
index c21e391f98..1f8d927b0c 100644
--- a/tools/ocaml/libs/xc/xenctrl.ml
+++ b/tools/ocaml/libs/xc/xenctrl.ml
@@ -298,6 +298,50 @@ external map_foreign_range: handle -> domid -> int
   -> nativeint -> Xenmmap.mmap_interface
   = "stub_map_foreign_range"
 
+type hvm_param =
+  | HVM_PARAM_CALLBACK_IRQ
+  | HVM_PARAM_STORE_PFN
+  | HVM_PARAM_STORE_EVTCHN
+  | HVM_PARAM_UNDEFINED_3
+  | HVM_PARAM_PAE_ENABLED
+  | HVM_PARAM_IOREQ_PFN
+  | HVM_PARAM_BUFIOREQ_PFN
+  | HVM_PARAM_UNDEFINED_7
+  | HVM_PARAM_UNDEFINED_8
+  | HVM_PARAM_VIRIDIAN
+  | HVM_PARAM_TIMER_MODE0
+  | HVM_PARAM_HPET_ENABLED1
+  | HVM_PARAM_IDENT_PT2
+  | HVM_PARAM_UNDEFINED_13
+  | HVM_PARAM_ACPI_S_STATE4
+  | HVM_PARAM_VM86_TSS5
+  | HVM_PARAM_VPT_ALIGN6
+  | HVM_PARAM_CONSOLE_PFN7
+  | HVM_PARAM_CONSOLE_EVTCHN8
+  | HVM_PARAM_ACPI_IOPORTS_LOCATION9
+  | HVM_PARAM_MEMORY_EVENT_CR00
+  | HVM_PARAM_MEMORY_EVENT_CR31
+  | HVM_PARAM_MEMORY_EVENT_CR42
+  | HVM_PARAM_MEMORY_EVENT_INT33
+  | HVM_PARAM_NESTEDHVM4
+  | HVM_PARAM_MEMORY_EVENT_SINGLE_STEP5
+  | HVM_PARAM_UNDEFINED_26
+  | HVM_PARAM_PAGING_RING_PFN7
+  | HVM_PARAM_MONITOR_RING_PFN8
+  | HVM_PARAM_SHARING_RING_PFN9
+  | HVM_PARAM_MEMORY_EVENT_MSR0
+  | HVM_PARAM_TRIPLE_FAULT_REASON1
+  | HVM_PARAM_IOREQ_SERVER_PFN2
+  | HVM_PARAM_NR_IOREQ_SERVER_PAGES3
+  | HVM_PARAM_VM_GENERATION_ID_ADDR4
+  | HVM_PARAM_ALTP2M5
+  | HVM_PARAM_X87_FIP_WIDTH6
+  | HVM_PARAM_VM86_TSS_SIZED7
+  | HVM_PARAM_MCA_CAP8
+
+external hvm_param_get: handle -> domid -> hvm_param -> int64
+  = "stub_xc_hvm_param_get"
+
 external domain_assign_device: handle -> domid -> (int * int * int * int) -> 
unit
   = "stub_xc_domain_assign_device"
 external domain_deassign_device: handle -> domid -> (int * int * int * int) -> 
unit
diff --git a/tools/ocaml/libs/xc/xenctrl.mli b/tools/ocaml/libs/xc/xenctrl.mli
index 60e7902e66..f6c7e5b553 100644
--- a/tools/ocaml/libs/xc/xenctrl.mli
+++ b/tools/ocaml/libs/xc/xenctrl.mli
@@ -236,6 +236,51 @@ external map_foreign_range :
   handle -> domid -> int -> nativeint -> Xenmmap.mmap_interface
   = "stub_map_foreign_range"
 
+(* needs to be sorted according to its numeric value, watch out for gaps! *)
+type hvm_param =
+  | HVM_PARAM_CALLBACK_IRQ
+  | HVM_PARAM_STORE_PFN
+  | HVM_PARAM_STORE_EVTCHN
+  | HVM_PARAM_UNDEFINED_3
+  | HVM_PARAM_PAE_ENABLED
+  | HVM_PARAM_IOREQ_PFN
+  | HVM_PARAM_BUFIOREQ_PFN
+  | HVM_PARAM_UNDEFINED_7
+  | HVM_PARAM_UNDEFINED_8
+  | HVM_PARAM_VIRIDIAN
+  | HVM_PARAM_TIMER_MODE0
+  | HVM_PARAM_HPET_ENABLED1
+  | HVM_PARAM_IDENT_PT2
+  | HVM_PARAM_UNDEFINED_13
+  | HVM_PARAM_ACPI_S_STATE4
+  | HVM_PARAM_VM86_TSS5
+  | HVM_PARAM_VPT_ALIGN6
+  | HVM_PARAM_CONSOLE_PFN7
+  | HVM_PARAM_CONSOLE_EVTCHN8
+  | HVM_PARAM_ACPI_IOPORTS_LOCATION9
+  | HVM_PARAM_MEMORY_EVENT_CR00
+  | HVM_PARAM_MEMORY_EVENT_CR31
+  | HVM_PARAM_MEMORY_EVENT_CR42
+  | HVM_PARAM_MEMORY_EVENT_INT33
+  | HVM_PARAM_NESTEDHVM4
+  | HVM_PARAM_MEMORY_EVENT_SINGLE_STEP5
+  | HVM_PARAM_UNDEFINED_26
+  | HVM_PARAM_PAGING_RING_PFN7
+  | HVM_PARAM_MONITOR_RING_PFN8
+  | HVM_PARAM_SHARING_RING_PFN9
+  | HVM_PARAM_MEMORY_EVENT_MSR0
+  | HVM_PARAM_TRIPLE_FAULT_REASON1
+  | HVM_PARAM_IOREQ_SERVER_PFN2
+  | HVM_PARAM_NR_IOREQ_SERVER_PAGES3
+  | HVM_PARAM_VM_GENERATION_ID_ADDR4
+  | HVM_PARAM_ALTP2M5
+  | HVM_PARAM_X87_FIP_WIDTH6
+  | HVM_PARAM_VM86_TSS_SIZED7
+  | HVM_PARAM_MCA_CAP8
+
+external hvm_param_get: handle -> domid -> hvm_param -> int64
+  = "stub_xc_hvm_param_get"
+
 external domain_assign_device: handle -> domid -> (int * int * int * int) -> 
unit
   = "stub_xc_domain_assign_device"
 external domain_deassign_device: handle -> domid -> (int * int * int * int) -> 
unit
diff --git a/tools/ocaml/libs/xc/xenctrl_stubs.c 
b/tools/ocaml/libs/xc/xenctrl_stubs.c
index 67f3648391..b2df93d4f8 100644
--- a/tools/ocaml/libs/

[PATCH v1 5/5] CODING_STYLE: add .clang-format

2022-11-30 Thread Edwin Török
Add a .clang-format configuration that tries to match CODING_STYLE where
possible.

I was not able to express the special casing of braces after 'do'
though, this can only be controlled generally for all control
statements.
It is imperfect, but should be better than the existing bindings, which
do not follow Xen coding style.

Add this to tools/ocaml first because:
* there are relatively few C files here, and it is a good place to start with
* it'd be useful to make these follow Xen's CODING_STYLE
(which they currently do not because they use tabs for example)
* they change relatively infrequently, so shouldn't cause issues with
  backporting security fixes (could either backport the reindentation
  patch too, or use git cherry-pick with `-Xignore-space-change`)

Once this is used it'll need inserting some '#include ', otherwise 
xs_wire.h
fails to compile due to the missing uint32_t define.

Does not yet reformat any code.

No functional change.

Signed-off-by: Edwin Török 
---
 tools/ocaml/.clang-format | 9 +
 1 file changed, 9 insertions(+)
 create mode 100644 tools/ocaml/.clang-format

diff --git a/tools/ocaml/.clang-format b/tools/ocaml/.clang-format
new file mode 100644
index 00..7ff88ee043
--- /dev/null
+++ b/tools/ocaml/.clang-format
@@ -0,0 +1,9 @@
+BasedOnStyle: GNU
+IndentWidth: 4
+
+# override GNU to match Xen ../../CODING_STYLE more closely
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+SpacesInConditionalStatement: true
+SpaceBeforeParens: ControlStatements
+BreakBeforeBraces: Allman
-- 
2.34.1




[[PATCH for-4.17 v1]] tools/ocaml/xenstored/xenstored.ml: fix incorrect scope

2022-11-10 Thread Edwin Török
A debug statement got introduced and code not reindented
(as it was part of a security fix and was trying to avoid that),
however that resulted in *only* the debug statement being part of the 'if',
and everything else outside of it.
This results in some unnecessary ring checks for domains which otherwise
have IO credit.

Remove the debug line.

Fixes: 42f0581a91 ("tools/oxenstored: Implement live update for socket 
connections")

Signed-off-by: Edwin Török 
---
Reason for inclusion in 4.17:
- bugfix for commit already in master

Changes since v3:
- new in v4
---
 tools/ocaml/xenstored/xenstored.ml | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/ocaml/xenstored/xenstored.ml 
b/tools/ocaml/xenstored/xenstored.ml
index ffd43a4eee..79f04178d8 100644
--- a/tools/ocaml/xenstored/xenstored.ml
+++ b/tools/ocaml/xenstored/xenstored.ml
@@ -476,7 +476,6 @@ let _ =
let ring_scan_checker dom =
(* no need to scan domains already marked as for processing *)
if not (Domain.get_io_credit dom > 0) then
-   debug "Looking up domid %d" (Domain.get_id dom);
let con = Connections.find_domain cons (Domain.get_id 
dom) in
if not (Connection.has_more_work con) then (
Process.do_output store cons domains con;
-- 
2.34.1




[PATCH] docs/process/sending-patches.pandoc: add a tip about using --trailer to collect Acked-by tags

2022-11-09 Thread Edwin Török
This can be useful when preparing a series for resend and want to
collect any Acked-by/etc. tags from the mailing list.
It avoids duplicate entries, and puts the tag in the correct place
(even if the commit has a --- line), and you get to see the result
before committing (unless you use --no-edit).

Signed-off-by: Edwin Török 
---
 docs/process/sending-patches.pandoc | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/docs/process/sending-patches.pandoc 
b/docs/process/sending-patches.pandoc
index 2e74c3b57e..90ac7b6cf1 100644
--- a/docs/process/sending-patches.pandoc
+++ b/docs/process/sending-patches.pandoc
@@ -72,6 +72,12 @@ after the patch was written.
 Do not split a tag across multiple lines, tags are exempt from the
 "wrap at 75 columns" rule in order to simplify parsing scripts.
 
+Tags can be added on the command-line in the appropriate place by using
+the `--trailer` flag (on a recent enough version of git), e.g.:
+```
+git commit --amend --trailer 'Acked-by: Maintainer '
+```
+
 ### Origin:
 
 Xen has inherited some source files from other open source projects. In case
-- 
2.34.1




[PATCH for-4.17 v3 10/15] tools/ocaml/xenstored: keep eventchn FD open across live update

2022-11-08 Thread Edwin Török
It has been discovered that the Windows PV drivers rebind their local
port of the xenstore evtchn post migrate,
which changes the eventchn port from e.g.
the default of 3 (nVCPUs+1) to 1 without oxenstored knowing about it.
And oxenstored goes and tries to rebind port 3, which is already in use
by something else.

The guest also writes to HVMPARAM, however there is a race condition
between it resetting the eventchn port and writing to HVMPARAM, thus it
is not possible to atomically obtain the guest's eventchn port for
xenstore.

It is possible to recover such a guest manually from a failed live update by
running introduceDomain from Dom0 with the correct port number obtain
using 'xen-vmdebug  hvmparam'.

There is also a small race condition window between closing the evtchn
FD and reopening it where guest drivers would get an unbound evtchn port
error from Xen when trying to talk to xenstore during live update.

Avoid this by trying to keep the eventchn file descriptor open across
live updates, using the new xenevtchn_fdopen API, similar to how C
xenstored works.

However an old enough oxenstored won't have kept the evtchn fd open and
won't have saved it in the update stream, so handle that case by
reopening the evtchn as a fallback.

A followup commit will avoid rebind the guest remote port (we cannot
rely on the remote port staying the same, e.g. the windows PV drivers
change it)

Signed-off-by: Edwin Török 
---
Reason for inclusion in 4.17:
- fixes live update in oxenstored, making future security updates easier
  if the base code already has this patch

Changes since v2:
- new in v3
---
 tools/ocaml/xenstored/domains.ml   |  1 +
 tools/ocaml/xenstored/event.ml |  7 ++-
 tools/ocaml/xenstored/xenstored.ml | 71 +-
 3 files changed, 56 insertions(+), 23 deletions(-)

diff --git a/tools/ocaml/xenstored/domains.ml b/tools/ocaml/xenstored/domains.ml
index a36b531663..d5c452d26c 100644
--- a/tools/ocaml/xenstored/domains.ml
+++ b/tools/ocaml/xenstored/domains.ml
@@ -56,6 +56,7 @@ let exist doms id = Hashtbl.mem doms.table id
 let find doms id = Hashtbl.find doms.table id
 let number doms = Hashtbl.length doms.table
 let iter doms fct = Hashtbl.iter (fun _ b -> fct b) doms.table
+let eventchn doms = doms.eventchn
 
 let rec is_empty_queue q =
   Queue.is_empty q ||
diff --git a/tools/ocaml/xenstored/event.ml b/tools/ocaml/xenstored/event.ml
index b10027f004..190ca6fcbf 100644
--- a/tools/ocaml/xenstored/event.ml
+++ b/tools/ocaml/xenstored/event.ml
@@ -20,7 +20,12 @@ type t = {
   mutable virq_port: Xeneventchn.t option;
 }
 
-let init () = { handle = Xeneventchn.init (); virq_port = None; }
+let init ?fd () =
+  let handle = match fd with
+| None -> Xeneventchn.init ~cloexec:false ()
+| Some fd -> Xeneventchn.fdopen fd
+  in
+  { handle; virq_port = None }
 let fd eventchn = Xeneventchn.fd eventchn.handle
 let bind_dom_exc_virq eventchn = eventchn.virq_port <- Some 
(Xeneventchn.bind_dom_exc_virq eventchn.handle)
 let bind_interdomain eventchn domid port = Xeneventchn.bind_interdomain 
eventchn.handle domid port
diff --git a/tools/ocaml/xenstored/xenstored.ml 
b/tools/ocaml/xenstored/xenstored.ml
index 34612814e1..cdd5b5ac67 100644
--- a/tools/ocaml/xenstored/xenstored.ml
+++ b/tools/ocaml/xenstored/xenstored.ml
@@ -144,7 +144,7 @@ module DB = struct
 
   let dump_format_header = "$xenstored-dump-format"
 
-  let from_channel_f chan global_f socket_f domain_f watch_f store_f =
+  let from_channel_f chan global_f event_f socket_f domain_f watch_f store_f =
 let unhexify s = Utils.unhexify s in
 let getpath s =
   let u = Utils.unhexify s in
@@ -165,6 +165,8 @@ module DB = struct
 (* there might be more parameters here,
e.g. a RO socket from a previous version: ignore it *)
 global_f ~rw
+  | "eventchnfd" :: eventfd :: [] ->
+event_f ~eventfd
   | "socket" :: fd :: [] ->
 socket_f ~fd:(int_of_string fd)
   | "dom" :: domid :: mfn :: port :: []->
@@ -189,10 +191,27 @@ module DB = struct
 done;
 info "Completed loading xenstore dump"
 
-  let from_channel store cons doms chan =
+  let from_channel store cons createdoms chan =
 (* don't let the permission get on our way, full perm ! *)
 let op = Store.get_ops store Perms.Connection.full_rights in
 let rwro = ref (None) in
+let eventchnfd = ref (None) in
+let doms = ref (None) in
+
+let require_doms () =
+  match !doms with
+  | None ->
+let missing_eventchnfd = !eventchnfd = None in
+if missing_eventchnfd then
+  warn "No event channel file descriptor available in dump!";
+let eventchn = Event.init ?fd:!eventchnfd () in
+let domains = createdoms eventchn in
+if missing_eventchnfd then
+  Event.bind_dom_exc_virq eventchn;
+doms := Some domai

[PATCH for-4.17 v3 14/15] tools/ocaml/xenstored/syslog_stubs.c: avoid potential NULL dereference

2022-11-08 Thread Edwin Török
If we are out of memory then strdup may return NULL, and passing NULL to
syslog may cause a crash.

Avoid this by using `caml_stat_strdup` which will raise an OCaml out of
memory exception instead.
This then needs to be paired with caml_stat_free.

Signed-off-by: Edwin Török 
---
Reason for inclusion in 4.17:
- fixes a bug in out of memory situations

Changes since v2:
- new in v3
---
 tools/ocaml/xenstored/syslog_stubs.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/tools/ocaml/xenstored/syslog_stubs.c 
b/tools/ocaml/xenstored/syslog_stubs.c
index 4e5e49b557..4ad85c8eb5 100644
--- a/tools/ocaml/xenstored/syslog_stubs.c
+++ b/tools/ocaml/xenstored/syslog_stubs.c
@@ -14,6 +14,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -35,14 +36,16 @@ static int __syslog_facility_table[] = {
 value stub_syslog(value facility, value level, value msg)
 {
 CAMLparam3(facility, level, msg);
-const char *c_msg = strdup(String_val(msg));
+char *c_msg = strdup(String_val(msg));
 int c_facility = __syslog_facility_table[Int_val(facility)]
| __syslog_level_table[Int_val(level)];
 
+if ( !c_msg )
+caml_raise_out_of_memory();
 caml_enter_blocking_section();
 syslog(c_facility, "%s", c_msg);
 caml_leave_blocking_section();
 
-free((void*)c_msg);
+free(c_msg);
 CAMLreturn(Val_unit);
 }
-- 
2.34.1




[PATCH for-4.17 v3 12/15] tools/ocaml/xenstored: log live update issues at warning level

2022-11-08 Thread Edwin Török
During live update oxenstored tries a best effort approach to recover as
many domains and information as possible even if it encounters errors
restoring some domains.
This defensive approach has already proven useful to recover domains
after a bug in oxenstored.

However the default log level is warning, which means that problems during live
update were not logged at all, unless you've changes oxenstored.conf
prior to live-update.

Signed-off-by: Edwin Török 
---
Reason for inclusion in 4.17:
- makes live update issues debuggable (live update can be used to apply
  security updates to 4.17)

Changes since v2:
- new in v3
---
 tools/ocaml/xenstored/xenstored.ml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/ocaml/xenstored/xenstored.ml 
b/tools/ocaml/xenstored/xenstored.ml
index d4ff24b440..78177b116f 100644
--- a/tools/ocaml/xenstored/xenstored.ml
+++ b/tools/ocaml/xenstored/xenstored.ml
@@ -184,9 +184,9 @@ module DB = struct
   (Perms.Node.of_string (unhexify perms ^ "\000"))
   (unhexify value)
   | _ ->
-info "restoring: ignoring unknown line: %s" line
+warn "restoring: ignoring unknown line: %s" line
 with exn ->
-  info "restoring: ignoring unknown line: %s (exception: %s)"
+  warn "restoring: ignoring unknown line: %s (exception: %s)"
 line (Printexc.to_string exn);
   ()
   with End_of_file ->
-- 
2.34.1




[PATCH for-4.17 v3 11/15] tools/ocaml/xenstored: do not rebind event channels after live update

2022-11-08 Thread Edwin Török
See explanation in previous commit.
This introduces a new field into the live update stream to retain both
ports, and handles the missing value in a backward compatible way.

Signed-off-by: Edwin Török 
---
Reason for inclusion 4.17:
- fixes a bug in oxenstored live update, needed to make live updates
  with future 4.17 security fixes possible/more reliable

Changes since v2:
- new in v3
---
 tools/ocaml/libs/eventchn/xeneventchn.ml  |  5 +
 tools/ocaml/libs/eventchn/xeneventchn.mli |  5 +
 tools/ocaml/xenstored/domain.ml   |  6 +-
 tools/ocaml/xenstored/domains.ml  | 13 +
 tools/ocaml/xenstored/event.ml|  1 +
 tools/ocaml/xenstored/xenstored.ml| 11 +++
 6 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/tools/ocaml/libs/eventchn/xeneventchn.ml 
b/tools/ocaml/libs/eventchn/xeneventchn.ml
index 34f7391f5e..7ccf7a99f0 100644
--- a/tools/ocaml/libs/eventchn/xeneventchn.ml
+++ b/tools/ocaml/libs/eventchn/xeneventchn.ml
@@ -43,6 +43,11 @@ type virq_t =
 
 external notify: handle -> int -> unit = "stub_eventchn_notify"
 external bind_interdomain: handle -> int -> int -> int = 
"stub_eventchn_bind_interdomain"
+
+let restore_interdomain handle _domid _remote_port local_port =
+  notify handle local_port;
+  local_port
+
 external bind_virq: handle -> virq_t -> int = "stub_eventchn_bind_virq"
 let bind_dom_exc_virq handle = bind_virq handle Dom_exc
 external unbind: handle -> int -> unit = "stub_eventchn_unbind"
diff --git a/tools/ocaml/libs/eventchn/xeneventchn.mli 
b/tools/ocaml/libs/eventchn/xeneventchn.mli
index 3965b29621..7407031b9e 100644
--- a/tools/ocaml/libs/eventchn/xeneventchn.mli
+++ b/tools/ocaml/libs/eventchn/xeneventchn.mli
@@ -68,6 +68,11 @@ val bind_interdomain : handle -> int -> int -> t
 channel connected to domid:remote_port. On error it will
 throw a Failure exception. *)
 
+val restore_interdomain : handle -> int -> int -> int -> t
+(** [restore_interdomain h domid remote_port local_port] returns a local event
+channel connected to domid:remote_port. On error it will
+throw a Failure exception. *)
+
 val bind_dom_exc_virq : handle -> t
 (** Binds a local event channel to the VIRQ_DOM_EXC
 (domain exception VIRQ). On error it will throw a Failure
diff --git a/tools/ocaml/xenstored/domain.ml b/tools/ocaml/xenstored/domain.ml
index 4e62a48e8e..5dad298614 100644
--- a/tools/ocaml/xenstored/domain.ml
+++ b/tools/ocaml/xenstored/domain.ml
@@ -61,7 +61,7 @@ let string_of_port = function
   | Some x -> string_of_int (Xeneventchn.to_int x)
 
 let dump d chan =
-  fprintf chan "dom,%d,%nd,%d\n" d.id d.mfn d.remote_port
+  fprintf chan "dom,%d,%nd,%d,%s\n" d.id d.mfn d.remote_port (string_of_port 
d.port)
 
 let notify dom = match dom.port with
   | None ->
@@ -77,6 +77,10 @@ let bind_interdomain dom =
   dom.port <- Some (Event.bind_interdomain dom.eventchn dom.id 
dom.remote_port);
   debug "bound domain %d remote port %d to local port %s" dom.id 
dom.remote_port (string_of_port dom.port)
 
+let restore_interdomain dom localport =
+  assert (dom.port = None);
+  dom.port <- Some (Event.restore_interdomain dom.eventchn dom.id 
dom.remote_port localport);
+  debug "restored interdomain %d remote port %d to local port %s" dom.id 
dom.remote_port (string_of_port dom.port)
 
 let close dom =
   debug "domain %d unbound port %s" dom.id (string_of_port dom.port);
diff --git a/tools/ocaml/xenstored/domains.ml b/tools/ocaml/xenstored/domains.ml
index d5c452d26c..af9fecf2f7 100644
--- a/tools/ocaml/xenstored/domains.ml
+++ b/tools/ocaml/xenstored/domains.ml
@@ -123,17 +123,22 @@ let cleanup doms =
 let resume _doms _domid =
   ()
 
-let create doms domid mfn port =
+let maybe_bind_interdomain restore_localport dom =
+  match restore_localport with
+  | None -> Domain.bind_interdomain dom
+  | Some p -> Domain.restore_interdomain dom p
+
+let create doms domid mfn ?restore_localport port =
   let interface = Xenctrl.map_foreign_range xc domid (Xenmmap.getpagesize()) 
mfn in
   let dom = Domain.make domid mfn port interface doms.eventchn in
   Hashtbl.add doms.table domid dom;
-  Domain.bind_interdomain dom;
+  maybe_bind_interdomain restore_localport dom;
   dom
 
 let xenstored_kva = ref ""
 let xenstored_port = ref ""
 
-let create0 doms =
+let create0 ?restore_localport doms =
   let port, interface =
 (
   let port = Utils.read_file_single_integer !xenstored_port
@@ -147,7 +152,7 @@ let create0 doms =
   in
   let dom = Domain.make 0 Nativeint.zero port interface doms.eventchn in
   Hashtbl.add doms.table 0 dom;
-  Domain.bind_interdomain dom;
+  maybe_bind_interdomain restore_localport dom;
   Domain.notify dom;
   dom
 
diff --git a/tools/ocaml/xenstored/event.ml b/tools/ocaml/xenstored/event.ml
index 190ca6fcbf..3de

[PATCH for-4.17 v3 15/15] tools/ocaml/libs/xc: fix use of uninitialized memory in shadow_allocation_get

2022-11-08 Thread Edwin Török
It has been noticed in 2013 that shadow allocation sometimes returns the
wrong value, which got worked around by adding a limit to the shadow
multiplier of 1000 and ignoring the value from Xen in that case
to avoid a shadow multiplier causing a VM to request 6PB of memory for
example:
https://github.com/xapi-project/xen-api/pull/1215/commits/be55a8c30b41d1cd7596fc100ab1cfd3539f74eb

However that is just a workaround, and I've just reproduced this by
killing a VM mid migration, which resulted in a shadow multiplier of
629.42, rendering the VM unbootable even after a host reboot.

The real bug is in Xen: when a VM is dying it will return '0' for paging
op domctls and log a message at info level
'Ignoring paging op on dying domain', which leaves the 'mb' parameter
uninitialized upon return from the domctl.

The binding also doesn't initialize the 'c->mb' parameter (it is meant
to be used only when setting, not when querying the allocation),
which results in the VM getting a shadow allocation (and thus multiplier)
set based on what value happened to be currently on the stack.

Explicitly initialize the value passed to the domctl, and detect the 
uninitialized
case (shadow allocation of 0), and raise an exception in that case.
The exception will cause xenopsd to skip setting the shadow multiplier.

Note that the behaviour of Xen here is inconsistent between x86 and ARM:
ARM would return EINVAL when it gets a paging op on a dying domain,
and X86-64 would return 0 with possibly uninitialized data.

It might be desirable to change the x86 path in the hypervisor to return
EINVAL, although that would require more testing in case it breaks
somethig.
But the bindings should be defensive anyway against bugs like this.

Signed-off-by: Edwin Török 
---
Reason for inclusion in 4.17:
- fixes a long-standing (>9y old) bug that is still happening today

Changes since v2:
- new in v3
---
 tools/ocaml/libs/xc/xenctrl_stubs.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/ocaml/libs/xc/xenctrl_stubs.c 
b/tools/ocaml/libs/xc/xenctrl_stubs.c
index e2d897581f..9681a74e40 100644
--- a/tools/ocaml/libs/xc/xenctrl_stubs.c
+++ b/tools/ocaml/libs/xc/xenctrl_stubs.c
@@ -1019,7 +1019,7 @@ CAMLprim value stub_shadow_allocation_get(value xch, 
value domid)
 {
 CAMLparam2(xch, domid);
 CAMLlocal1(mb);
-unsigned int c_mb;
+unsigned int c_mb = 0;
 int ret;
 
 caml_enter_blocking_section();
@@ -1029,6 +1029,9 @@ CAMLprim value stub_shadow_allocation_get(value xch, 
value domid)
 caml_leave_blocking_section();
 if (ret != 0)
 failwith_xc(_H(xch));
+if ( !c_mb )
+caml_failwith("domctl returned uninitialized data for shadow "
+  "allocation, dying domain?");
 
 mb = Val_int(c_mb);
 CAMLreturn(mb);
-- 
2.34.1




[PATCH for-4.17 v3 13/15] tools/ocaml/xenstored: set uncaught exception handler

2022-11-08 Thread Edwin Török
Helps debug fatal errors during live update

Previously this would've just gone to /dev/null, because:
* daemonize reopens stderr as /dev/null
* systemd redirects stderr to /dev/null too

Previously the only way to debug this was to manually run oxenstored with
--no-fork, but when you have a fatal error and oxenstored just
disappears you'd want to know why.
There has been at least one observed instance of a bug where oxenstored
just disappeared inexplicably (it was believed due to an OOM exception).

Signed-off-by: Edwin Török 
---
Reason for inclusion in 4.17:
- avoids losing crucial information during a fatal error (e.g. during
  live update)

Changes since v2:
- new in v3
---
 tools/ocaml/xenstored/logging.ml   | 33 ++
 tools/ocaml/xenstored/xenstored.ml |  3 ++-
 2 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/tools/ocaml/xenstored/logging.ml b/tools/ocaml/xenstored/logging.ml
index 021ebc465b..cced038c48 100644
--- a/tools/ocaml/xenstored/logging.ml
+++ b/tools/ocaml/xenstored/logging.ml
@@ -342,3 +342,36 @@ let xb_answer ~tid ~con ~ty data =
 let watch_not_fired ~con perms path =
   let data = Printf.sprintf "EPERM perms=[%s] path=%s" perms path in
   access_logging ~tid:0 ~con ~data Watch_not_fired ~level:Info
+
+let print_flush msg =
+  prerr_endline msg;
+  flush stderr
+
+let msg_of exn bt =
+  Printf.sprintf "Fatal exception: %s\n%s\n" (Printexc.to_string exn)
+(Printexc.raw_backtrace_to_string bt)
+
+let fallback_exception_handler exn bt =
+  (* stderr goes to /dev/null, so use the logger where possible,
+ but always print to stderr too, in case everything else fails,
+ e.g. this can be used to debug with --no-fork
+
+ this function should try not to raise exceptions, but if it does
+ the ocaml runtime should still print the exception, both the original,
+ and the one from this function, but to stderr this time
+  *)
+  let msg = msg_of exn bt in
+  print_flush msg;
+  (* See Printexc.set_uncaught_exception_handler, need to flush,
+ so has to call stop and flush *)
+  match !xenstored_logger with
+  | Some l -> error "xenstored-fallback" "%s" msg; l.stop ()
+  | None ->
+(* Too early, no logger set yet.
+   We normally try to use the configured logger so we don't flood syslog
+   during development for example, or if the user has a file set
+*)
+try Syslog.log Syslog.Daemon Syslog.Err msg
+with e ->
+  let bt = Printexc.get_raw_backtrace () in
+  print_flush @@ msg_of e bt
diff --git a/tools/ocaml/xenstored/xenstored.ml 
b/tools/ocaml/xenstored/xenstored.ml
index 78177b116f..6828764f92 100644
--- a/tools/ocaml/xenstored/xenstored.ml
+++ b/tools/ocaml/xenstored/xenstored.ml
@@ -357,7 +357,8 @@ let tweak_gc () =
   Gc.set { (Gc.get ()) with Gc.max_overhead = !Define.gc_max_overhead }
 
 
-let _ =
+let () =
+  Printexc.set_uncaught_exception_handler Logging.fallback_exception_handler;
   let cf = do_argv in
   let pidfile =
 if Sys.file_exists (config_filename cf) then
-- 
2.34.1




[PATCH for-4.17 v3 09/15] tools/ocaml/xenstored/store.ml: fix build error

2022-11-08 Thread Edwin Török
Building with Dune in release mode fails with:
```
File "ocaml/xenstored/store.ml", line 464, characters 13-32:
Warning 18: this type-based record disambiguation is not principal.
File "ocaml/xenstored/store.ml", line 1:
Error: Some fatal warnings were triggered (1 occurrences)
```

This is a warning to help keep the code futureproof, quoting from its
documentation:
> Check information path during type-checking, to make sure that all types are
> derived in a principal way. When using labelled arguments and/or polymorphic
> methods, this flag is required to ensure future versions of the compiler will
> be able to infer types correctly, even if internal algorithms change. All
> programs accepted in -principal mode are also accepted in the default mode 
> with
> equivalent types, but different binary signatures, and this may slow down type
> checking; yet it is a good idea to use it once before publishing source code.

Fixes: db471408edd46 "tools/ocaml/xenstored: Fix quota bypass on domain 
shutdown"

Signed-off-by: Edwin Török 
---
Reason for inclusion in 4.17:
- fixes a build error in a previous commit that is already in master

Changes since v2:
- new in v3
---
 tools/ocaml/xenstored/store.ml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/ocaml/xenstored/store.ml b/tools/ocaml/xenstored/store.ml
index 14ec404988..38a4945372 100644
--- a/tools/ocaml/xenstored/store.ml
+++ b/tools/ocaml/xenstored/store.ml
@@ -461,7 +461,7 @@ let reset_permissions store domid =
   | Some perms ->
 if perms <> node.perms then
   Logging.debug "store|node" "Changed permissions for node %s" 
(Node.get_name node);
-Some { node with perms }
+Some { node with Node.perms }
 ) store.root
 
 type ops = {
-- 
2.34.1




[PATCH for-4.17 v3 01/15] tools/ocaml/libs/eventchn: do not leak event channels and OCaml 5.0 compat

2022-11-08 Thread Edwin Török
Add a finalizer on the event channel value, so that it calls
`xenevtchn_close` when the value would be GCed.

In practice oxenstored seems to be the only user of this,
and it creates a single global event channel only,
but freeing this could still be useful when run with OCAMLRUNPARAM=c

The code was previously casting a C pointer to an OCaml value,
which should be avoided: OCaml 5.0 won't support it.
(all "naked" C pointers must be wrapped inside an OCaml value,
 either an Abstract tag, or Nativeint, see the manual
 https://ocaml.org/manual/intfc.html#ss:c-outside-head)

Signed-off-by: Edwin Török 
Acked-by: Christian Lindig 
---
Reason for inclusion in 4.17:
- Fixes a runtime bug with upcoming OCaml 5.0 (already in beta)

Changes since v2:
- added Acked-by line
- add 4.17 reason
---
 tools/ocaml/libs/eventchn/xeneventchn_stubs.c | 29 +--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/tools/ocaml/libs/eventchn/xeneventchn_stubs.c 
b/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
index f889a7a2e4..67af116377 100644
--- a/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
+++ b/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
@@ -33,7 +33,30 @@
 #include 
 #include 
 
-#define _H(__h) ((xenevtchn_handle *)(__h))
+/* We want to close the event channel when it is no longer in use,
+   which can only be done safely with a finalizer.
+   Event channels are typically long lived, so we don't need tighter control 
over resource deallocation.
+   Use a custom block
+*/
+
+/* Access the xenevtchn_t* part of the OCaml custom block */
+#define _H(__h) (*((xenevtchn_handle**)Data_custom_val(__h)))
+
+static void stub_evtchn_finalize(value v)
+{
+   /* docs say to not use any CAMLparam* macros here */
+   xenevtchn_close(_H(v));
+}
+
+static struct custom_operations xenevtchn_ops = {
+   "xenevtchn",
+   stub_evtchn_finalize,
+   custom_compare_default, /* raises Failure, cannot compare */
+   custom_hash_default, /* ignored */
+   custom_serialize_default, /* raises Failure, can't serialize */
+   custom_deserialize_default, /* raises Failure, can't deserialize */
+   custom_compare_ext_default /* raises Failure */
+};
 
 CAMLprim value stub_eventchn_init(void)
 {
@@ -48,7 +71,9 @@ CAMLprim value stub_eventchn_init(void)
if (xce == NULL)
caml_failwith("open failed");
 
-   result = (value)xce;
+   /* contains file descriptors, trigger full GC at least every 128 
allocations */
+   result = caml_alloc_custom(_ops, sizeof(xce), 0, 1);
+   _H(result) = xce;
CAMLreturn(result);
 }
 
-- 
2.34.1




[PATCH for-4.17 v3 04/15] tools/ocaml/xenstored/Makefile: use ocamldep -sort for linking order

2022-11-08 Thread Edwin Török
A better solution is being worked on for master,
but for now use ocamldep -sort instead of a manually established link
order.
The manually established link order will be wrong when (security)
patches introduce new dependencies between files that would require
changing the link order.

If dune was used as a build system this wouldn't be a problem, but we
can't use Dune yet due to OSSTest, which is stuck on Debian oldstable.

No functional change.

Signed-off-by: Edwin Török 
---
Reason for inclusion in 4.17:

Avoids having to put this patch as a prerequisite into a security update.
Earlier versions of XSA-326 needed this, and although latest version
didn't we don't know whether it might be needed again in the future or not.

Changes since v2:
- new in v3 (was previously emailed to security team though)
---
 tools/ocaml/xenstored/Makefile | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tools/ocaml/xenstored/Makefile b/tools/ocaml/xenstored/Makefile
index 6f7333926e..e8aaecf2e6 100644
--- a/tools/ocaml/xenstored/Makefile
+++ b/tools/ocaml/xenstored/Makefile
@@ -72,7 +72,11 @@ XENSTOREDLIBS = \
 PROGRAMS = oxenstored
 
 oxenstored_LIBS = $(XENSTOREDLIBS)
-oxenstored_OBJS = $(OBJS)
+# use ocamldep to figure out link order, otherwise the Makefile would have
+# to be continously adjusted for security patches that introduce new
+# dependencies between files
+oxenstored_MLSORTED = $(shell $(OCAMLDEP) -sort $(OBJS:=.ml))
+oxenstored_OBJS = $(oxenstored_MLSORTED:.ml=)
 
 OCAML_PROGRAM = oxenstored
 
-- 
2.34.1




[PATCH for-4.17 v3 00/15] OCaml fixes for Xen 4.17

2022-11-08 Thread Edwin Török
These are the patches that I have outstanding for Xen 4.17.
I have included a reason why I'm requesting them to be included in 4.17
after the --- line in each individual patch, see also a summary below.

For convenience the patches are also available in a git repo:
```
git remote add edwintorok https://github.com/edwintorok/xen.git
git fetch edwintorok private/edvint/for-4.17
git log -p origin/master..private/edvint/for-4.17
```
And viewable with a browser too:
https://github.com/edwintorok/xen/compare/private/edvint/for-4.17

* 3 patches related to OCaml 5 support
https://patchwork.kernel.org/project/xen-devel/list/?series=680975
These have already been posted to the list previously, but not
yet committed to master (I probably didn't use the correct subject and
CC line for patches meant for 4.17, I think I've fixed that now)

* Makefile.rules followup
Also part of https://patchwork.kernel.org/project/xen-devel/list/?series=680975
these address some review feedback that I received after patches got
committed

* oxenstored live update bugfixes
Testing of oxenstored live update has revealed some bugs (some of which
got discovered on the C side too and fixed during one of the previous
XSAs, but unfortunately none of that discussion is public, and we've
ended up rediscovering the issue in the OCaml implementation too,
which reminded me of the XSA discussions at the time).
This brings the OCaml live update handling of event channels closer to
the C xenstored version.
It also fixes a few more bugs regarding logging and exception handling
during live update, and during out of memory situations (theoretical now
after XSA-326 fix).

* a bugfix for a xenctrl binding
Xen returns uninitialized data as part of a paging op domctl when a
domain is dying. Workaround in the C stub by always initializing the
domctl arguments to detect this.
Xen fix in hypervisor side will be done separately, but even then having
this is useful defensive coding.
This is a 9 year old bug that still happens today, I've encountered it
while testing this very series, hence the inclusion here.

I expect most of these to be straight forward bugfixes, the only one
slightly controversial might be the indentation one: changing tabs to
spaces to match Xen coding style.

I was unsure whether to include it here,
but I think it is best to have it in 4.17 to simplify future
(security) backports from master to 4.17, and avoid having to deal with
whitespace issues all the time when writing patches.
The code here used a style that was different from Xen's, and also
different from every other piece of code that I work on, and OCaml indentation
tools also only support spaces, not tabs, so there really is no reason
to keep the code as is (initially I thought it uses tabs to follow Xen
style, but after reading CODING_STYLE I realized that is not true).
It is very easy to verify that the patch changes nothing with `git diff
-w`, or `git log -p -1`.

Edwin Török (15):
  tools/ocaml/libs/eventchn: do not leak event channels and OCaml 5.0
compat
  tools/ocaml/libs/xc: OCaml 5.0 compatibility
  tools/ocaml/libs/{xb, mmap}: use Data_abstract_val wrapper
  tools/ocaml/xenstored/Makefile: use ocamldep -sort for linking order
  tools/ocaml/Makefile.rules: do not run ocamldep on distclean
  tools/ocaml/Makefile.rules: hide -include on *clean
  CODING_STYLE(tools/ocaml): add 'make format' and remove tabs
  tools/ocaml/libs/evtchn: add xenevtchn_fdopen bindings
  tools/ocaml/xenstored/store.ml: fix build error
  tools/ocaml/xenstored: keep eventchn FD open across live update
  tools/ocaml/xenstored: do not rebind event channels after live update
  tools/ocaml/xenstored: log live update issues at warning level
  tools/ocaml/xenstored: set uncaught exception handler
  tools/ocaml/xenstored/syslog_stubs.c: avoid potential NULL dereference
  tools/ocaml/libs/xc: fix use of uninitialized memory in
shadow_allocation_get

 tools/ocaml/Makefile  |5 +
 tools/ocaml/Makefile.rules|4 +-
 tools/ocaml/libs/eventchn/xeneventchn.ml  |   11 +-
 tools/ocaml/libs/eventchn/xeneventchn.mli |   14 +-
 tools/ocaml/libs/eventchn/xeneventchn_stubs.c |  199 +-
 tools/ocaml/libs/mmap/mmap_stubs.h|9 +-
 tools/ocaml/libs/mmap/xenmmap.ml  |2 +-
 tools/ocaml/libs/mmap/xenmmap.mli |4 +-
 tools/ocaml/libs/mmap/xenmmap_stubs.c |  114 +-
 tools/ocaml/libs/xb/op.ml |   76 +-
 tools/ocaml/libs/xb/packet.ml |   30 +-
 tools/ocaml/libs/xb/partial.ml|   48 +-
 tools/ocaml/libs/xb/xb.ml |  422 ++--
 tools/ocaml/libs/xb/xb.mli|  106 +-
 tools/ocaml/libs/xb/xenbus_stubs.c|   50 +-
 tools/ocaml/libs/xb/xs_ring.ml|   28 +-
 tools/ocaml/libs/xb/xs_ring_stubs.c   |  216 +-
 tools/ocaml/libs/xc/abi-check |2 +-
 tools/ocaml/libs/xc/xenctrl.ml

[PATCH for-4.17 v3 08/15] tools/ocaml/libs/evtchn: add xenevtchn_fdopen bindings

2022-11-08 Thread Edwin Török
Signed-off-by: Edwin Török 
---
Reason for inclusion in 4.17:
- needed for a bugfix in a followup commit

Changes since v2:
- new in v3
---
 tools/ocaml/libs/eventchn/xeneventchn.ml  |  6 +++-
 tools/ocaml/libs/eventchn/xeneventchn.mli |  9 +-
 tools/ocaml/libs/eventchn/xeneventchn_stubs.c | 28 +--
 3 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/tools/ocaml/libs/eventchn/xeneventchn.ml 
b/tools/ocaml/libs/eventchn/xeneventchn.ml
index dd00a1f0ea..34f7391f5e 100644
--- a/tools/ocaml/libs/eventchn/xeneventchn.ml
+++ b/tools/ocaml/libs/eventchn/xeneventchn.ml
@@ -16,7 +16,11 @@
 
 type handle
 
-external init: unit -> handle = "stub_eventchn_init"
+external init_cloexec: bool -> handle = "stub_eventchn_init"
+
+let init ?(cloexec=true) () = init_cloexec cloexec
+
+external fdopen: Unix.file_descr -> handle = "stub_eventchn_fdopen"
 external fd: handle -> Unix.file_descr = "stub_eventchn_fd"
 
 type t = int
diff --git a/tools/ocaml/libs/eventchn/xeneventchn.mli 
b/tools/ocaml/libs/eventchn/xeneventchn.mli
index 08c7337643..3965b29621 100644
--- a/tools/ocaml/libs/eventchn/xeneventchn.mli
+++ b/tools/ocaml/libs/eventchn/xeneventchn.mli
@@ -43,7 +43,14 @@ val to_int: t -> int
 
 val of_int: int -> t
 
-val init: unit -> handle
+val init: ?cloexec:bool -> unit -> handle
+(** [init ?cloexec ()]
+Return an initialised event channel interface.
+The default is to close the underlying file descriptor
+on [execve], which can be overriden with [~cloexec:false].
+On error it will throw a Failure exception. *)
+
+val fdopen: Unix.file_descr -> handle
 (** Return an initialised event channel interface. On error it
 will throw a Failure exception. *)
 
diff --git a/tools/ocaml/libs/eventchn/xeneventchn_stubs.c 
b/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
index 2263c4caa1..92092ca31e 100644
--- a/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
+++ b/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
@@ -58,14 +58,36 @@ static struct custom_operations xenevtchn_ops = {
 custom_compare_ext_default /* raises Failure */
 };
 
-CAMLprim value stub_eventchn_init(void)
+CAMLprim value stub_eventchn_init(value cloexec)
 {
-CAMLparam0();
+CAMLparam1(cloexec);
 CAMLlocal1(result);
 xenevtchn_handle *xce;
 
 caml_enter_blocking_section();
-xce = xenevtchn_open(NULL, 0);
+xce = xenevtchn_open(NULL, Bool_val(cloexec) ? 0 : XENEVTCHN_NO_CLOEXEC);
+caml_leave_blocking_section();
+
+if ( xce == NULL )
+caml_failwith("open failed");
+
+/* contains file descriptors, trigger full GC at least every 128
+ * allocations
+ */
+result = caml_alloc_custom(_ops, sizeof(xce), 0, 1);
+_H(result) = xce;
+CAMLreturn(result);
+}
+
+CAMLprim value stub_eventchn_fdopen(value fdval)
+{
+CAMLparam1(fdval);
+CAMLlocal1(result);
+xenevtchn_handle *xce;
+
+caml_enter_blocking_section();
+/* having any flags here would raise EINVAL */
+xce = xenevtchn_fdopen(NULL, Int_val(fdval), 0);
 caml_leave_blocking_section();
 
 if (xce == NULL)
-- 
2.34.1




[PATCH for-4.17 v3 03/15] tools/ocaml/libs/{xb, mmap}: use Data_abstract_val wrapper

2022-11-08 Thread Edwin Török
This is not strictly necessary since it is essentially a no-op
currently: a cast to void* and value*, even in OCaml 5.0.

However it does make it clearer that what we have here is not a regular
OCaml value, but one allocated with Abstract_tag or Custom_tag,
and follows the example from the manual more closely:
https://v2.ocaml.org/manual/intfc.html#ss:c-outside-head

It also makes it clearer that these modules have been reviewed for
compat with OCaml 5.0.

No functional change.

Signed-off-by: Edwin Török 
Acked-by: Christian Lindig 
---
Reason for inclusion in 4.17:
- make code follow best practice for upcoming OCaml 5.0 compiler (already in 
beta)

Changes since v2:
- add Acked-by line
---
 tools/ocaml/libs/mmap/xenmmap_stubs.c | 2 +-
 tools/ocaml/libs/xb/xs_ring_stubs.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/ocaml/libs/mmap/xenmmap_stubs.c 
b/tools/ocaml/libs/mmap/xenmmap_stubs.c
index e2ce088e25..141dedb78c 100644
--- a/tools/ocaml/libs/mmap/xenmmap_stubs.c
+++ b/tools/ocaml/libs/mmap/xenmmap_stubs.c
@@ -28,7 +28,7 @@
 #include 
 #include 
 
-#define Intf_val(a) ((struct mmap_interface *) a)
+#define Intf_val(a) ((struct mmap_interface *) Data_abstract_val(a))
 
 static int mmap_interface_init(struct mmap_interface *intf,
int fd, int pflag, int mflag,
diff --git a/tools/ocaml/libs/xb/xs_ring_stubs.c 
b/tools/ocaml/libs/xb/xs_ring_stubs.c
index 7a91fdee75..cc9114029f 100644
--- a/tools/ocaml/libs/xb/xs_ring_stubs.c
+++ b/tools/ocaml/libs/xb/xs_ring_stubs.c
@@ -35,7 +35,7 @@
 #include 
 #include "mmap_stubs.h"
 
-#define GET_C_STRUCT(a) ((struct mmap_interface *) a)
+#define GET_C_STRUCT(a) ((struct mmap_interface *) Data_abstract_val(a))
 
 /*
  * Bytes_val has been introduced by Ocaml 4.06.1. So define our own version
-- 
2.34.1




[PATCH for-4.17 v3 02/15] tools/ocaml/libs/xc: OCaml 5.0 compatibility

2022-11-08 Thread Edwin Török
Follow the manual to avoid naked pointers:
https://v2.ocaml.org/manual/intfc.html#ss:c-outside-head

No functional change, except on OCaml 5.0 where it is a bugfix.

Signed-off-by: Edwin Török 
Acked-by: Christian Lindig 
---
Reason for inclusion in 4.17:
- bugfix for upcoming OCaml 5.0 compiler (already in beta)

Changes since v2:
- add Acked-by line
---
 tools/ocaml/libs/mmap/mmap_stubs.h  |  5 +
 tools/ocaml/libs/xc/xenctrl_stubs.c | 11 ++-
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/tools/ocaml/libs/mmap/mmap_stubs.h 
b/tools/ocaml/libs/mmap/mmap_stubs.h
index 65e4239890..5c65cc86fb 100644
--- a/tools/ocaml/libs/mmap/mmap_stubs.h
+++ b/tools/ocaml/libs/mmap/mmap_stubs.h
@@ -30,4 +30,9 @@ struct mmap_interface
int len;
 };
 
+/* for compatibility with OCaml 4.02.3 */
+#ifndef Data_abstract_val
+#define Data_abstract_val(v) ((void*) Op_val(v))
+#endif
+
 #endif
diff --git a/tools/ocaml/libs/xc/xenctrl_stubs.c 
b/tools/ocaml/libs/xc/xenctrl_stubs.c
index a8789d19be..8cd11060ec 100644
--- a/tools/ocaml/libs/xc/xenctrl_stubs.c
+++ b/tools/ocaml/libs/xc/xenctrl_stubs.c
@@ -37,7 +37,7 @@
 
 #include "mmap_stubs.h"
 
-#define _H(__h) ((xc_interface *)(__h))
+#define _H(__h) *((xc_interface **) Data_abstract_val(__h))
 #define _D(__d) ((uint32_t)Int_val(__d))
 
 #ifndef Val_none
@@ -70,14 +70,15 @@ static void Noreturn failwith_xc(xc_interface *xch)
 CAMLprim value stub_xc_interface_open(void)
 {
CAMLparam0();
-xc_interface *xch;
+   CAMLlocal1(result);
 
+   result = caml_alloc(1, Abstract_tag);
/* Don't assert XC_OPENFLAG_NON_REENTRANT because these bindings
 * do not prevent re-entrancy to libxc */
-xch = xc_interface_open(NULL, NULL, 0);
-if (xch == NULL)
+   _H(result) = xc_interface_open(NULL, NULL, 0);
+   if (_H(result) == NULL)
failwith_xc(NULL);
-CAMLreturn((value)xch);
+   CAMLreturn(result);
 }
 
 
-- 
2.34.1




[PATCH for-4.17 v3 05/15] tools/ocaml/Makefile.rules: do not run ocamldep on distclean

2022-11-08 Thread Edwin Török
Fixes: 2f2b76d47c5bcd9 ("tools/ocaml: do not run ocamldep during make clean")

Signed-off-by: Edwin Török 
---
Reason for inclusion in 4.17:
- addresses review comments received post commit

Changes since v2:
- add Fixes line
- add 4.17 reason
---
 tools/ocaml/Makefile.rules | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/ocaml/Makefile.rules b/tools/ocaml/Makefile.rules
index 0d3c6ac839..e0b9de34e4 100644
--- a/tools/ocaml/Makefile.rules
+++ b/tools/ocaml/Makefile.rules
@@ -44,7 +44,7 @@ META: META.in
 
 ALL_OCAML_OBJ_SOURCES=$(addsuffix .ml, $(ALL_OCAML_OBJS))
 
-ifneq ($(MAKECMDGOALS),clean)
+ifeq (,$(findstring clean,$(MAKECMDGOALS)))
 .ocamldep.make: $(ALL_OCAML_OBJ_SOURCES) Makefile 
$(OCAML_TOPLEVEL)/Makefile.rules
$(call quiet-command, $(OCAMLDEP) $(ALL_OCAML_OBJ_SOURCES) *.mli 
$o,MLDEP,)
 endif
-- 
2.34.1




[PATCH for-4.17 v3 06/15] tools/ocaml/Makefile.rules: hide -include on *clean

2022-11-08 Thread Edwin Török
Fixes: 2f2b76d47c5bcd9 ("tools/ocaml: do not run ocamldep during make clean")

Signed-off-by: Edwin Török 
---
Reason for inclusion in 4.17:
- addresses review comments received post commit

Changes since v2:
- add Fixes line
- add 4.17 reason
---
 tools/ocaml/Makefile.rules | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/ocaml/Makefile.rules b/tools/ocaml/Makefile.rules
index e0b9de34e4..39ac260a4d 100644
--- a/tools/ocaml/Makefile.rules
+++ b/tools/ocaml/Makefile.rules
@@ -44,10 +44,8 @@ META: META.in
 
 ALL_OCAML_OBJ_SOURCES=$(addsuffix .ml, $(ALL_OCAML_OBJS))
 
-ifeq (,$(findstring clean,$(MAKECMDGOALS)))
 .ocamldep.make: $(ALL_OCAML_OBJ_SOURCES) Makefile 
$(OCAML_TOPLEVEL)/Makefile.rules
$(call quiet-command, $(OCAMLDEP) $(ALL_OCAML_OBJ_SOURCES) *.mli 
$o,MLDEP,)
-endif
 
 clean: $(CLEAN_HOOKS)
$(Q)rm -f .*.d *.o *.so *.a *.cmo *.cmi *.cma *.cmx *.cmxa *.annot 
*.spot *.spit $(LIBS) $(PROGRAMS) $(GENERATED_FILES) .ocamldep.make META
@@ -94,7 +92,9 @@ define C_PROGRAM_template
$(call quiet-command, $(CC) $(LDFLAGS) -o $$@ $$+,BIN,$$@)
 endef
 
+ifeq (,$(findstring clean,$(MAKECMDGOALS)))
 -include .ocamldep.make
+endif
 
 $(foreach lib,$(OCAML_LIBRARY),$(eval $(call OCAML_LIBRARY_template,$(lib
 $(foreach lib,$(OCAML_NOC_LIBRARY),$(eval $(call 
OCAML_NOC_LIBRARY_template,$(lib
-- 
2.34.1




[[PATCH for-4.17 v1]] tools/ocaml/xenstored/store.ml: fix build error

2022-11-03 Thread Edwin Török
Building with Dune in release mode fails with:
```
File "ocaml/xenstored/store.ml", line 464, characters 13-32:
Warning 18: this type-based record disambiguation is not principal.
File "ocaml/xenstored/store.ml", line 1:
Error: Some fatal warnings were triggered (1 occurrences)
```

This is a warning to help keep the code futureproof, quoting from its
documentation:
> Check information path during type-checking, to make sure that all types are
> derived in a principal way. When using labelled arguments and/or polymorphic
> methods, this flag is required to ensure future versions of the compiler will
> be able to infer types correctly, even if internal algorithms change. All
> programs accepted in -principal mode are also accepted in the default mode 
> with
> equivalent types, but different binary signatures, and this may slow down type
> checking; yet it is a good idea to use it once before publishing source code.

Fixes: db471408edd46 "tools/ocaml/xenstored: Fix quota bypass on domain 
shutdown"

Signed-off-by: Edwin Török 
---
 tools/ocaml/xenstored/store.ml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/ocaml/xenstored/store.ml b/tools/ocaml/xenstored/store.ml
index 14ec404988..38a4945372 100644
--- a/tools/ocaml/xenstored/store.ml
+++ b/tools/ocaml/xenstored/store.ml
@@ -461,7 +461,7 @@ let reset_permissions store domid =
   | Some perms ->
 if perms <> node.perms then
   Logging.debug "store|node" "Changed permissions for node %s" 
(Node.get_name node);
-Some { node with perms }
+Some { node with Node.perms }
 ) store.root
 
 type ops = {
-- 
2.34.1




[PATCH for-4.17 v1 0/2] xenctrl.ml: improve scalability of domain_getinfolist

2022-11-01 Thread Edwin Török
Pau has performed some performance tests by booting 1000 mirage
unikernels test VMs and shutting them down.
We've noticed on the flamegraphs that a lot of time is spent in Xenctrl
`domain_getinfolist`, 17.7% of overall time
(This needs to be multiplied by 16 because Dom0 100% usage = 16 vCPUs)
In particular time is spent in camlXenctrl___getlist_339
as can be seen from this flamegraph, and it also creates a very deep
call stack:
https://cdn.jsdelivr.net/gh/edwintorok/xen@xenctrl-coverletter/docs/tmp/perf-merge-boot.svg?x=948.9=2213

After some algorithmic improvements to the code now the function barely
shows up at all on a flamegraph, taking only 0.02%.
The function is called camlXenctrl___getlist_343, but that is just due
to the changed arguments, still the same function:
https://cdn.jsdelivr.net/gh/edwintorok/xen@xenctrl-coverletter/docs/tmp/perf-xen-boot-1150.svg?x=1188.0=1941=infolist

It was calling the Xen hypercall ~500*1000 times for 1000 VMs, and
instead it is now calling it "only" 1000 times.

I would suggest to try to take this in 4.17 given the massive
improvement in scalability (number of VMs on a Xen host).

There are further improvements possible here, but they'll be in xenopsd
(part of XAPI) to avoid calling domain_getinfolist and just use
domain_getinfo: the only reason it needs use infolist is that it does
the lookup by VM UUID and not by domid, but it could have a small cache
of UUID->domid mappings and then call just domain_getinfo (or get the
mapping from xenstore if not in the cache), but it looks like that
improvement is not even needed if this function barely registers on a
flamegraph now.

P.S.: the mirage test VM is a very old PV version, at some point we'll
repeat the test with a Solo5 based PVH one.

Edwin Török (2):
  xenctrl.ml: make domain_getinfolist tail recursive
  xenctrl: use larger chunksize in domain_getinfolist

 tools/ocaml/libs/xc/xenctrl.ml | 25 ++---
 1 file changed, 18 insertions(+), 7 deletions(-)

-- 
2.34.1




[PATCH for-4.17 v1 2/2] xenctrl: use larger chunksize in domain_getinfolist

2022-11-01 Thread Edwin Török
The support limit of XAPI is 1000, so using 1024 will very likely get
everything in one go.
Other code in Xen also uses chunk sizes of 256 or 1024, and would be
much better than 2, especially now that list construction is more
efficient.

Xenopsd should also use `domain_getinfo` instead of `domain_getinfolist`
in a lot of places where info list is used, but that is another
optimization.

Signed-off-by: Edwin Török 
Tested-by: Pau Ruiz Safont 
---
 tools/ocaml/libs/xc/xenctrl.ml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/ocaml/libs/xc/xenctrl.ml b/tools/ocaml/libs/xc/xenctrl.ml
index 3ebedffdc7..a56539ff2c 100644
--- a/tools/ocaml/libs/xc/xenctrl.ml
+++ b/tools/ocaml/libs/xc/xenctrl.ml
@@ -237,7 +237,7 @@ let rev_append_fold acc e = List.rev_append e acc
 let rev_concat lst = List.fold_left rev_append_fold [] lst
 
 let domain_getinfolist handle first_domain =
-   let nb = 2 in
+   let nb = 1024 in
let rec __getlist lst from =
(* _domain_getinfolist returns domains in reverse order, 
largest first *)
match _domain_getinfolist handle from nb with
-- 
2.34.1




[PATCH for-4.17 v1 1/2] xenctrl.ml: make domain_getinfolist tail recursive

2022-11-01 Thread Edwin Török
On a host with ~1000 VMs (the support limit for XAPI) domain_getinfolist
took O(n^2) time (n=number of domains).
This couples with xenopsd making inefficient calls to
domain_getinfolist(1 call/VM) resulted in visibly bad performance of
XAPI.

It was calling the Xen domainfolist hypercall N/2 times.
Optimize this such that it is called at most 2 times during normal use.

Implement a tail recursive `rev_concat` equivalent to `concat |> rev`,
and use it instead of calling `@` multiple times.

The added benefit is that the list of domains should now actually be in
increasing order instead of having pairs of 2 changing direction every time.

Signed-off-by: Edwin Török 
Tested-by: Pau Ruiz Safont 
---
 tools/ocaml/libs/xc/xenctrl.ml | 23 +--
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/tools/ocaml/libs/xc/xenctrl.ml b/tools/ocaml/libs/xc/xenctrl.ml
index 28ed642231..3ebedffdc7 100644
--- a/tools/ocaml/libs/xc/xenctrl.ml
+++ b/tools/ocaml/libs/xc/xenctrl.ml
@@ -226,14 +226,25 @@ external domain_shutdown: handle -> domid -> 
shutdown_reason -> unit
 external _domain_getinfolist: handle -> domid -> int -> domaininfo list
= "stub_xc_domain_getinfolist"
 
+let rev_append_fold acc e = List.rev_append e acc
+
+(**
+   [rev_concat lst] is equivalent to [lst |> List.concat |> List.rev]
+   except it is tail recursive, whereas [List.concat] isn't.
+   Example:
+   rev_concat [[10;9;8];[7;6];[5]]] = [5; 6; 7; 8; 9; 10]
+*)
+let rev_concat lst = List.fold_left rev_append_fold [] lst
+
 let domain_getinfolist handle first_domain =
let nb = 2 in
-   let last_domid l = (List.hd l).domid + 1 in
-   let rec __getlist from =
-   let l = _domain_getinfolist handle from nb in
-   (if List.length l = nb then __getlist (last_domid l) else []) @ 
l
-   in
-   List.rev (__getlist first_domain)
+   let rec __getlist lst from =
+   (* _domain_getinfolist returns domains in reverse order, 
largest first *)
+   match _domain_getinfolist handle from nb with
+   | [] -> rev_concat lst
+   | (hd :: _) as l -> __getlist (l :: lst) (hd.domid + 1)
+   in
+   __getlist [] first_domain
 
 external domain_getinfo: handle -> domid -> domaininfo= 
"stub_xc_domain_getinfo"
 
-- 
2.34.1




[PATCH for-4.17 v2] tools/ocaml/xenstored: fix live update exception

2022-10-21 Thread Edwin Török
During live update we will load the /tool/xenstored path from the previous 
binary,
and then try to mkdir /tool again which will fail with EEXIST.
Check for existence of the path before creating it.

The write call to /tool/xenstored should not need any changes
(and we do want to overwrite any previous path, in case it changed).

Prior to 7110192b1df6 live update would work only if the binary path was
specified, and with 7110192b1df6 and this live update also works when
no binary path is specified in `xenstore-control live-update`.

Fixes: 7110192b1df6 ("tools/oxenstored: Fix Oxenstored Live Update")
Signed-off-by: Edwin Török 
Acked-by: Christian Lindig 
Release-acked-by: Henry Wang 
---
 tools/ocaml/xenstored/xenstored.ml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/ocaml/xenstored/xenstored.ml 
b/tools/ocaml/xenstored/xenstored.ml
index fc90fcdeb5..acc7290627 100644
--- a/tools/ocaml/xenstored/xenstored.ml
+++ b/tools/ocaml/xenstored/xenstored.ml
@@ -353,7 +353,9 @@ let _ =
) in
 
(* required for xenstore-control to detect availability of live-update 
*)
-   Store.mkdir store Perms.Connection.full_rights (Store.Path.of_string 
"/tool");
+   let tool_path = Store.Path.of_string "/tool" in
+   if not (Store.path_exists store tool_path) then
+   Store.mkdir store Perms.Connection.full_rights tool_path;
Store.write store Perms.Connection.full_rights
(Store.Path.of_string "/tool/xenstored") Sys.executable_name;
 

base-commit: 0c06760be3dc3f286015e18c4b1d1694e55da026
-- 
2.34.1




[PATCH for-4.17] tools/ocaml/xenstored: fix live update exception

2022-10-20 Thread Edwin Török
During live update we will load the /tool/xenstored path from the previous 
binary,
and then try to mkdir /tool again which will fail with EEXIST.
Check for existence of the path before creating it.

The write call to /tool/xenstored should not need any changes
(and we do want to overwrite any previous path, in case it changed).

Prior to 7110192b1df6 live update would work only if the binary path was
specified, and with 7110192b1df6 and this live update also works when
no binary path is specified in `xenstore-control live-update`.

Fixes: 7110192b1df6 ("tools/oxenstored: Fix Oxenstored Live Update")
Signed-off-by: Edwin Török 
---
 tools/ocaml/xenstored/xenstored.ml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/tools/ocaml/xenstored/xenstored.ml 
b/tools/ocaml/xenstored/xenstored.ml
index fc90fcdeb5..3299fe73f7 100644
--- a/tools/ocaml/xenstored/xenstored.ml
+++ b/tools/ocaml/xenstored/xenstored.ml
@@ -353,7 +353,9 @@ let _ =
) in
 
(* required for xenstore-control to detect availability of live-update 
*)
-   Store.mkdir store Perms.Connection.full_rights (Store.Path.of_string 
"/tool");
+   let tool_path = Store.Path.of_string "/tool" in
+   if not (Store.path_exists store tool_path) then
+   Store.mkdir store 
Perms.Connection.full_rights tool_path;
Store.write store Perms.Connection.full_rights
(Store.Path.of_string "/tool/xenstored") Sys.executable_name;
 

base-commit: 0c06760be3dc3f286015e18c4b1d1694e55da026
-- 
2.34.1




[PATCH v2 1/5] tools/ocaml/Makefile.rules: do not run ocamldep on distclean

2022-09-27 Thread Edwin Török
Signed-off-by: Edwin Török 
---
 tools/ocaml/Makefile.rules | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/ocaml/Makefile.rules b/tools/ocaml/Makefile.rules
index 0d3c6ac839..e0b9de34e4 100644
--- a/tools/ocaml/Makefile.rules
+++ b/tools/ocaml/Makefile.rules
@@ -44,7 +44,7 @@ META: META.in
 
 ALL_OCAML_OBJ_SOURCES=$(addsuffix .ml, $(ALL_OCAML_OBJS))
 
-ifneq ($(MAKECMDGOALS),clean)
+ifeq (,$(findstring clean,$(MAKECMDGOALS)))
 .ocamldep.make: $(ALL_OCAML_OBJ_SOURCES) Makefile 
$(OCAML_TOPLEVEL)/Makefile.rules
$(call quiet-command, $(OCAMLDEP) $(ALL_OCAML_OBJ_SOURCES) *.mli 
$o,MLDEP,)
 endif
-- 
2.34.1




[PATCH v2 2/5] tools/ocaml/Makefile.rules: hide -include on *clean

2022-09-27 Thread Edwin Török
Signed-off-by: Edwin Török 
---
 tools/ocaml/Makefile.rules | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/ocaml/Makefile.rules b/tools/ocaml/Makefile.rules
index e0b9de34e4..39ac260a4d 100644
--- a/tools/ocaml/Makefile.rules
+++ b/tools/ocaml/Makefile.rules
@@ -44,10 +44,8 @@ META: META.in
 
 ALL_OCAML_OBJ_SOURCES=$(addsuffix .ml, $(ALL_OCAML_OBJS))
 
-ifeq (,$(findstring clean,$(MAKECMDGOALS)))
 .ocamldep.make: $(ALL_OCAML_OBJ_SOURCES) Makefile 
$(OCAML_TOPLEVEL)/Makefile.rules
$(call quiet-command, $(OCAMLDEP) $(ALL_OCAML_OBJ_SOURCES) *.mli 
$o,MLDEP,)
-endif
 
 clean: $(CLEAN_HOOKS)
$(Q)rm -f .*.d *.o *.so *.a *.cmo *.cmi *.cma *.cmx *.cmxa *.annot 
*.spot *.spit $(LIBS) $(PROGRAMS) $(GENERATED_FILES) .ocamldep.make META
@@ -94,7 +92,9 @@ define C_PROGRAM_template
$(call quiet-command, $(CC) $(LDFLAGS) -o $$@ $$+,BIN,$$@)
 endef
 
+ifeq (,$(findstring clean,$(MAKECMDGOALS)))
 -include .ocamldep.make
+endif
 
 $(foreach lib,$(OCAML_LIBRARY),$(eval $(call OCAML_LIBRARY_template,$(lib
 $(foreach lib,$(OCAML_NOC_LIBRARY),$(eval $(call 
OCAML_NOC_LIBRARY_template,$(lib
-- 
2.34.1




[PATCH v2 4/5] tools/ocaml/libs/xc: OCaml 5.0 compatibility

2022-09-27 Thread Edwin Török
Follow the manual to avoid naked pointers:
https://v2.ocaml.org/manual/intfc.html#ss:c-outside-head

No functional change, except on OCaml 5.0 where it is a bugfix.

Signed-off-by: Edwin Török 
---
 tools/ocaml/libs/xc/xenctrl_stubs.c | 11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/tools/ocaml/libs/xc/xenctrl_stubs.c 
b/tools/ocaml/libs/xc/xenctrl_stubs.c
index 19335bdf45..7ff4e00314 100644
--- a/tools/ocaml/libs/xc/xenctrl_stubs.c
+++ b/tools/ocaml/libs/xc/xenctrl_stubs.c
@@ -37,7 +37,7 @@
 
 #include "mmap_stubs.h"
 
-#define _H(__h) ((xc_interface *)(__h))
+#define _H(__h) *((xc_interface **) Data_abstract_val(__h))
 #define _D(__d) ((uint32_t)Int_val(__d))
 
 #ifndef Val_none
@@ -70,14 +70,15 @@ static void Noreturn failwith_xc(xc_interface *xch)
 CAMLprim value stub_xc_interface_open(void)
 {
CAMLparam0();
-xc_interface *xch;
+   CAMLlocal1(result);
 
+   result = caml_alloc(1, Abstract_tag);
/* Don't assert XC_OPENFLAG_NON_REENTRANT because these bindings
 * do not prevent re-entrancy to libxc */
-xch = xc_interface_open(NULL, NULL, 0);
-if (xch == NULL)
+   _H(result) = xc_interface_open(NULL, NULL, 0);
+   if (_H(result) == NULL)
failwith_xc(NULL);
-CAMLreturn((value)xch);
+   CAMLreturn(result);
 }
 
 
-- 
2.34.1




[PATCH v2 3/5] tools/ocaml/libs/eventchn: do not leak event channels and OCaml 5.0 compat

2022-09-27 Thread Edwin Török
Add a finalizer on the event channel value, so that it calls
`xenevtchn_close` when the value would be GCed.

In practice oxenstored seems to be the only user of this,
and it creates a single global event channel only,
but freeing this could still be useful when run with OCAMLRUNPARAM=c

The code was previously casting a C pointer to an OCaml value,
which should be avoided: OCaml 5.0 won't support it.
(all "naked" C pointers must be wrapped inside an OCaml value,
 either an Abstract tag, or Nativeint, see the manual
 https://ocaml.org/manual/intfc.html#ss:c-outside-head)

Signed-off-by: Edwin Török 
Acked-by: Christian Lindig 
---
 tools/ocaml/libs/eventchn/xeneventchn_stubs.c | 29 +--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/tools/ocaml/libs/eventchn/xeneventchn_stubs.c 
b/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
index f889a7a2e4..67af116377 100644
--- a/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
+++ b/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
@@ -33,7 +33,30 @@
 #include 
 #include 
 
-#define _H(__h) ((xenevtchn_handle *)(__h))
+/* We want to close the event channel when it is no longer in use,
+   which can only be done safely with a finalizer.
+   Event channels are typically long lived, so we don't need tighter control 
over resource deallocation.
+   Use a custom block
+*/
+
+/* Access the xenevtchn_t* part of the OCaml custom block */
+#define _H(__h) (*((xenevtchn_handle**)Data_custom_val(__h)))
+
+static void stub_evtchn_finalize(value v)
+{
+   /* docs say to not use any CAMLparam* macros here */
+   xenevtchn_close(_H(v));
+}
+
+static struct custom_operations xenevtchn_ops = {
+   "xenevtchn",
+   stub_evtchn_finalize,
+   custom_compare_default, /* raises Failure, cannot compare */
+   custom_hash_default, /* ignored */
+   custom_serialize_default, /* raises Failure, can't serialize */
+   custom_deserialize_default, /* raises Failure, can't deserialize */
+   custom_compare_ext_default /* raises Failure */
+};
 
 CAMLprim value stub_eventchn_init(void)
 {
@@ -48,7 +71,9 @@ CAMLprim value stub_eventchn_init(void)
if (xce == NULL)
caml_failwith("open failed");
 
-   result = (value)xce;
+   /* contains file descriptors, trigger full GC at least every 128 
allocations */
+   result = caml_alloc_custom(_ops, sizeof(xce), 0, 1);
+   _H(result) = xce;
CAMLreturn(result);
 }
 
-- 
2.34.1




[PATCH v2 0/5] tools/ocaml: build/compatibility fixes with OCaml 5.0 for Xen 4.17

2022-09-27 Thread Edwin Török
Changes to previous series:
* removed Dune patches from this series for now (that requires more work to 
work with osstest on Debian oldstable that won't be ready in time for 4.17)
* also updated xenctrl to work with no naked pointers mode (the only mode in 
OCaml 5.0)
* changed alloc_custom to use '0' and '1' instead of '1' and '128' for values 
that are singletons anyway

This can be tested with OCaml <5.0 (e.g. 4.13 or 4.14) with 
--enable-naked-pointer-checker
to find instances where naked pointers are used or by code review.
(Note that OCaml 5.0 won't have support for naked pointers at all, and thus
it doesn't have the checker either)

It would be good to get this included in Xen 4.17, especially that it
changes the internal ABI of xenctrl bindings.

Edwin Török (5):
  tools/ocaml/Makefile.rules: do not run ocamldep on distclean
  tools/ocaml/Makefile.rules: hide -include on *clean
  tools/ocaml/libs/eventchn: do not leak event channels and OCaml 5.0
compat
  tools/ocaml/libs/xc: OCaml 5.0 compatibility
  tools/ocaml/libs/{xb, mmap}: use Data_abstract_val wrapper

 tools/ocaml/Makefile.rules|  4 +--
 tools/ocaml/libs/eventchn/xeneventchn_stubs.c | 29 +--
 tools/ocaml/libs/mmap/xenmmap_stubs.c |  2 +-
 tools/ocaml/libs/xb/xs_ring_stubs.c   |  2 +-
 tools/ocaml/libs/xc/xenctrl_stubs.c   | 11 +++
 5 files changed, 37 insertions(+), 11 deletions(-)

-- 
2.34.1




[PATCH v2 5/5] tools/ocaml/libs/{xb, mmap}: use Data_abstract_val wrapper

2022-09-27 Thread Edwin Török
This is not strictly necessary since it is essentially a no-op
currently: a cast to void* and value*, even in OCaml 5.0.

However it does make it clearer that what we have here is not a regular
OCaml value, but one allocated with Abstract_tag or Custom_tag,
and follows the example from the manual more closely:
https://v2.ocaml.org/manual/intfc.html#ss:c-outside-head

It also makes it clearer that these modules have been reviewed for
compat with OCaml 5.0.

No functional change.

Signed-off-by: Edwin Török 
---
 tools/ocaml/libs/mmap/xenmmap_stubs.c | 2 +-
 tools/ocaml/libs/xb/xs_ring_stubs.c   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/ocaml/libs/mmap/xenmmap_stubs.c 
b/tools/ocaml/libs/mmap/xenmmap_stubs.c
index e2ce088e25..141dedb78c 100644
--- a/tools/ocaml/libs/mmap/xenmmap_stubs.c
+++ b/tools/ocaml/libs/mmap/xenmmap_stubs.c
@@ -28,7 +28,7 @@
 #include 
 #include 
 
-#define Intf_val(a) ((struct mmap_interface *) a)
+#define Intf_val(a) ((struct mmap_interface *) Data_abstract_val(a))
 
 static int mmap_interface_init(struct mmap_interface *intf,
int fd, int pflag, int mflag,
diff --git a/tools/ocaml/libs/xb/xs_ring_stubs.c 
b/tools/ocaml/libs/xb/xs_ring_stubs.c
index 7a91fdee75..cc9114029f 100644
--- a/tools/ocaml/libs/xb/xs_ring_stubs.c
+++ b/tools/ocaml/libs/xb/xs_ring_stubs.c
@@ -35,7 +35,7 @@
 #include 
 #include "mmap_stubs.h"
 
-#define GET_C_STRUCT(a) ((struct mmap_interface *) a)
+#define GET_C_STRUCT(a) ((struct mmap_interface *) Data_abstract_val(a))
 
 /*
  * Bytes_val has been introduced by Ocaml 4.06.1. So define our own version
-- 
2.34.1




[RFC PATCH] tools/configure: require OCaml >= 4.06.1 for oxenstored

2022-07-29 Thread Edwin Török
OCaml 4.06.1 is widely available in distributions: 
https://repology.org/project/ocaml/versions

oxenstored already includes some compatibility code to be able to run on
versions older than 4.06, however this is slightly less efficient than
just using the new features in 4.06 standard library:
https://lore.kernel.org/xen-devel/b94cd2ad099486678609909e12b045c54abb2f27.ca...@citrix.com/

The OCaml version in stubdom/ is unchanged for now as it is unclear how
this used. Typically to run OCaml code as a stubdom one would use the mirage
tooling to build a unikernel, which handles cross-compilation using
Dune.
The unikernel itself also uses Solo5 instead of MiniOS, so the OCaml
code in stubdom/ is probably stale.

Signed-off-by: Edwin Török 
Cc: Christian Lindig 
---
 tools/configure| 2 +-
 tools/configure.ac | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/configure b/tools/configure
index 41deb7fb96..8f391e2da4 100755
--- a/tools/configure
+++ b/tools/configure
@@ -6765,7 +6765,7 @@ else
  -e 's/[^0-9]//g'`
 
 
-  ax_compare_version_B=`echo "4.02.0" | sed -e 's/\([0-9]*\)/Z\1Z/g' \
+  ax_compare_version_B=`echo "4.06.1" | sed -e 's/\([0-9]*\)/Z\1Z/g' \
  -e 's/Z\([0-9]\)Z/Z0\1Z/g' \
  -e 's/Z\([0-9][0-9]\)Z/Z0\1Z/g' \
  -e 's/Z\([0-9][0-9][0-9]\)Z/Z0\1Z/g' \
diff --git a/tools/configure.ac b/tools/configure.ac
index 32cbe6bd3c..7518199ec8 100644
--- a/tools/configure.ac
+++ b/tools/configure.ac
@@ -310,7 +310,7 @@ AS_IF([test "x$ocamltools" = "xy"], [
 AC_MSG_ERROR([Ocaml tools enabled, but missing ocamlopt or 
ocamlfind])])
 ocamltools="n"
 ], [
-AX_COMPARE_VERSION([$OCAMLVERSION], [lt], [4.02.0], [
+AX_COMPARE_VERSION([$OCAMLVERSION], [lt], [4.06.1], [
 AS_IF([test "x$enable_ocamltools" = "xyes"], [
 AC_MSG_ERROR([Your version of OCaml: $OCAMLVERSION is not 
supported])])
 ocamltools="n"
-- 
2.34.1




[PATCH v1 5/7] tools/ocaml: fix compiler warnings

2022-07-29 Thread Edwin Török
Fix compiler warning about:
* unused value
* ambiguous documentation comment
* non-principal type inference (compiler version dependent)

No functional change.

Signed-off-by: Edwin Török 
---
 tools/ocaml/xenstored/connection.ml | 2 +-
 tools/ocaml/xenstored/process.ml| 5 +++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/tools/ocaml/xenstored/connection.ml 
b/tools/ocaml/xenstored/connection.ml
index 65f99ea6f2..a94d47cdc2 100644
--- a/tools/ocaml/xenstored/connection.ml
+++ b/tools/ocaml/xenstored/connection.ml
@@ -313,7 +313,7 @@ let is_bad con = match con.dom with None -> false | Some 
dom -> Domain.is_bad_do
 let has_extra_connection_data con =
let has_in = has_input con || has_partial_input con in
let has_out = has_output con in
-   let has_socket = con.dom = None in
+   let _has_socket = con.dom = None in
let has_nondefault_perms = make_perm con.dom <> con.perm in
has_in || has_out
(* TODO: what about SIGTERM, should use systemd to store FDS
diff --git a/tools/ocaml/xenstored/process.ml b/tools/ocaml/xenstored/process.ml
index 27790d4a5c..86eed02413 100644
--- a/tools/ocaml/xenstored/process.ml
+++ b/tools/ocaml/xenstored/process.ml
@@ -59,7 +59,7 @@ let split_one_path data con =
 
 let process_watch t cons =
let oldroot = t.Transaction.oldroot in
-   let newroot = Store.get_root t.store in
+   let newroot = Store.get_root t.Transaction.store in
let ops = Transaction.get_paths t |> List.rev in
let do_op_watch op cons =
let recurse, oldroot, root = match (fst op) with
@@ -491,7 +491,7 @@ let transaction_replay c t doms cons =
ignore @@ Connection.end_transaction c tid None
)
 
-let do_watch con t _domains cons data =
+let do_watch con _t _domains cons data =
let (node, token) =
match (split None '\000' data) with
| [node; token; ""]   -> node, token
@@ -651,6 +651,7 @@ let maybe_ignore_transaction = function
 
 
 let () = Printexc.record_backtrace true
+
 (**
  * Nothrow guarantee.
  *)
-- 
2.34.1




[PATCH v1 4/7] tools/ocaml: Makefile to drive dune

2022-07-29 Thread Edwin Török
create a separate Makefile that can be used to drive dune.

Usage:
`make -f Makefile.dune`

There are some files that need to be created by the Makefile based
build system (such as all the C code in $(XEN_ROOT)/tools/libs),
and those need to exist before dune runs.

Although it'd be possible to automatically call the necessary makefile
rules from dune, it wouldn't work reliably:
* dune uses sandboxing by default (only files declared or known as
  dependencies are visible to individual build commands,
  symlinks/hardlinks are used by dune to implement this)
* the dune builds always run in a _build subdir, and calling the
  makefiles from there would get the wrong XEN_ROOT set
* running the make command in the source tree would work, but dune still
  wouldn't immediately see the build dependencies since they wouldn't
  have been copied/linked under _build

The approach here is to:
* use the Makefile to build C-only prerequisites (i.e. most of Xen)
* use Dune only to build the OCaml parts once the C prerequisites exist
* dune has dependencies declared on the C bits, so if they are missing
  you will get an error about a missing rule to create them instead of a
  cryptic compilation error
* dune is still optional - the old Makefile based buildsystem is still
  there for now
* use dune exclusively for new code going forward (e.g. OCaml test-suites)

The workspace file needs to be generated by make because this currently
cannot be generated by dune, and it doesn't support including external
files. But could be generated by configure?

LD_LIBRARY_PATH needs to be set, because even with -Wl,-rpath
executables wouldn't be able to run using the just-built libraries,
unless we'd also link all the transitive dependencies of libs.

No functional change.

Signed-off-by: Edwin Török 
---
 Makefile  |  5 ++
 tools/ocaml/Makefile.dune | 88 +++
 tools/ocaml/dune-workspace.dev.in |  2 +
 tools/ocaml/dune-workspace.in | 18 +++
 4 files changed, 113 insertions(+)
 create mode 100644 tools/ocaml/Makefile.dune
 create mode 100644 tools/ocaml/dune-workspace.dev.in
 create mode 100644 tools/ocaml/dune-workspace.in

diff --git a/Makefile b/Makefile
index b93b22c752..ddb33c3555 100644
--- a/Makefile
+++ b/Makefile
@@ -68,6 +68,11 @@ build-tools-oxenstored: build-tools-public-headers
$(MAKE) -s -C tools/libs
$(MAKE) -C tools/ocaml build-tools-oxenstored
 
+.PHONY: build-tools-oxenstored-prepare
+build-tools-oxenstored-prepare: build-tools-public-headers
+   test -f tools/config.status || (cd tools && ./configure 
--with-xenstored=oxenstored)
+   $(MAKE) -C tools/libs V=
+
 .PHONY: build-stubdom
 build-stubdom: mini-os-dir build-tools-public-headers
$(MAKE) -C stubdom build
diff --git a/tools/ocaml/Makefile.dune b/tools/ocaml/Makefile.dune
new file mode 100644
index 00..eca9cac0ca
--- /dev/null
+++ b/tools/ocaml/Makefile.dune
@@ -0,0 +1,88 @@
+XEN_ROOT = $(CURDIR)/../..
+all: dune-all-check
+
+# Dune by default uses all available CPUs. Make doesn't.
+# Query the available CPUs and use all available for any of the make rules we 
call out to.
+# -O is also needed with parallel make such that the build error and the build 
command causing
+#  the error are close together and not interspersed with other output
+NPROC=$(shell getconf _NPROCESSORS_ONLN)
+MAKEN=$(MAKE) -j$(NPROC) -O
+
+# We want to link and use the Xen libraries built locally
+# without installing them system-wide
+# (the system-wide one installed from packages will likely be too old and not 
match the locally
+# built one anyway).
+#
+# Set LIBRARY_PATH and LD_LIBRARY_PATH so that the linker
+# finds the proper libraries and the various dune commands
+# work (e.g. running tests, utop, etc.).
+#
+# The Makefile based buildsystem would use -Wl,-rpath-link= here,
+# but that only works during linking, not runtime.
+# There is a -Wl, -rpath= that can be used, but that only works
+# for libraries linked directly to the main executable:
+# the dependencies of those libraries won't get found on the rpath
+# (the rpath of the executable is apparently not used during that search).
+#
+# Use environment variables, because that way we don't make any permanent 
alternations (rpath)
+# to the executable, so once installed system-wide it won't refer to build 
paths anymore.
+#
+# Dune cannot be used to generate this file: the env-vars stanza doesn't 
support %{read:}, :include,
+# and dune-workspace doesn't support (include) stanzas.
+# So for now generate it from this Makefile
+# Cannot start with comment, so add auto-generated comment at the end
+LIB_DIRS=$(abspath $(wildcard ../libs/*/.))
+LIBRARY_PATH=$(subst $(eval) ,:,$(LIB_DIRS))
+../dune-workspace ../dune-workspace.dev: dune-workspace.in 
dune-workspace.dev.in Makefile.dune
+   @( sed -e "s|@LIBRARY_PATH@|$(LIBRARY_PATH)|" <$< \
+   && echo "; DO NOT EDIT: autogenerated fro

[PATCH v1 1/7] tools/ocaml/Makefile: do not run ocamldep during make clean

2022-07-29 Thread Edwin Török
Trying to include .ocamldep.make will cause it to be generated if it
doesn't exist.
We do not want this during make clean: we would remove it anyway.

Speeds up make clean.

Before (measured on f732240fd3bac25116151db5ddeb7203b62e85ce, July 2022):
```
Parsing 
/home/edwin/xen2/tools/ocaml/libs/xl/../../../../tools/libs/light/libxl_types.idl
Parsing 
/home/edwin/xen2/tools/ocaml/libs/xl/../../../../tools/libs/light/libxl_types.idl
Parsing 
/home/edwin/xen2/tools/ocaml/libs/xl/../../../../tools/libs/light/libxl_types.idl
Parsing 
/home/edwin/xen2/tools/ocaml/libs/xl/../../../../tools/libs/light/libxl_types.idl
Parsing 
/home/edwin/xen2/tools/ocaml/libs/xl/../../../../tools/libs/light/libxl_types.idl

 Performance counter stats for 'make clean -j8 -s' (5 runs):

4.2233 +- 0.0208 seconds time elapsed  ( +-  0.49% )
```

After:
```
perf stat -r 5 --null make clean -j8 -s

 Performance counter stats for 'make clean -j8 -s' (5 runs):

2.7325 +- 0.0138 seconds time elapsed  ( +-  0.51% )
```

No functional change.

Signed-off-by: Edwin Török 
---
 tools/ocaml/Makefile.rules | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/ocaml/Makefile.rules b/tools/ocaml/Makefile.rules
index 7e4db457a1..d368308d9b 100644
--- a/tools/ocaml/Makefile.rules
+++ b/tools/ocaml/Makefile.rules
@@ -44,8 +44,10 @@ META: META.in
 
 ALL_OCAML_OBJ_SOURCES=$(addsuffix .ml, $(ALL_OCAML_OBJS))
 
+ifneq ($(MAKECMDGOALS),clean)
 .ocamldep.make: $(ALL_OCAML_OBJ_SOURCES) Makefile 
$(OCAML_TOPLEVEL)/Makefile.rules
$(call quiet-command, $(OCAMLDEP) $(ALL_OCAML_OBJ_SOURCES) *.mli 
$o,MLDEP,)
+endif
 
 clean: $(CLEAN_HOOKS)
$(Q)rm -f .*.d *.o *.so *.a *.cmo *.cmi *.cma *.cmx *.cmxa *.annot 
*.spot *.spit $(LIBS) $(PROGRAMS) $(GENERATED_FILES) .ocamldep.make META
-- 
2.34.1




[PATCH v1 3/7] tools/ocaml/*/dune: dune based build system

2022-07-29 Thread Edwin Török
Based on Christian Lindig's work.

Initially this will be used to build unit tests, and to make development
easier.

Dune supports proper incremental builds and editor integration with
merlin/LSP.

For now the Makefile based build system is retained too: this is not a
hard dependency on Dune.

Using version 2.1 of Dune build language here, because that is the one
available in Ubuntu Focal (part of the CI here).

No functional change.

Signed-off-by: Edwin Török 
---
 tools/.gitignore   |  7 +
 tools/dune |  5 
 tools/dune-project |  1 +
 tools/ocaml/dune-project   | 27 ++
 tools/ocaml/libs/eventchn/dune | 11 
 tools/ocaml/libs/mmap/dune |  9 ++
 tools/ocaml/libs/xb/dune   | 10 +++
 tools/ocaml/libs/xc/dune   | 16 +++
 tools/ocaml/libs/xs/dune   | 15 ++
 tools/ocaml/xenstored/dune | 51 ++
 10 files changed, 152 insertions(+)
 create mode 100644 tools/.gitignore
 create mode 100644 tools/dune
 create mode 100644 tools/dune-project
 create mode 100644 tools/ocaml/dune-project
 create mode 100644 tools/ocaml/libs/eventchn/dune
 create mode 100644 tools/ocaml/libs/mmap/dune
 create mode 100644 tools/ocaml/libs/xb/dune
 create mode 100644 tools/ocaml/libs/xc/dune
 create mode 100644 tools/ocaml/libs/xs/dune
 create mode 100644 tools/ocaml/xenstored/dune

diff --git a/tools/.gitignore b/tools/.gitignore
new file mode 100644
index 00..c211749a3b
--- /dev/null
+++ b/tools/.gitignore
@@ -0,0 +1,7 @@
+dune-workspace*
+_build/
+.merlin
+*.h.gch
+*.opam
+ocaml/*.install
+include/_xentoolcore_list.h
diff --git a/tools/dune b/tools/dune
new file mode 100644
index 00..febbd078f0
--- /dev/null
+++ b/tools/dune
@@ -0,0 +1,5 @@
+; only look inside ocaml and include subdirectory, speeds up the build
+; since dune doesn't need to copy/hash/monitor all the other files
+(dirs ocaml)
+
+(data_only_dirs include libs)
diff --git a/tools/dune-project b/tools/dune-project
new file mode 100644
index 00..cd8d4e3d86
--- /dev/null
+++ b/tools/dune-project
@@ -0,0 +1 @@
+(lang dune 2.1)
diff --git a/tools/ocaml/dune-project b/tools/ocaml/dune-project
new file mode 100644
index 00..1dae7b0acb
--- /dev/null
+++ b/tools/ocaml/dune-project
@@ -0,0 +1,27 @@
+(lang dune 2.1)
+
+(name xen)
+
+(formatting (enabled_for dune))
+(generate_opam_files true)
+
+(maintainers christian.lin...@citrix.com)
+(license LGPL)
+
+(package
+ (name xen)
+ (synopsis "Xen interfaces")
+ (depends
+  base-unix
+  (dune (>= 2.1))
+ )
+)
+
+(package
+ (name xenstored)
+ (synopsis "In-memory key-value store for the Xen hypervisor")
+ (depends
+  base-unix
+  (dune (>= 2.1))
+ )
+)
diff --git a/tools/ocaml/libs/eventchn/dune b/tools/ocaml/libs/eventchn/dune
new file mode 100644
index 00..4468f2e769
--- /dev/null
+++ b/tools/ocaml/libs/eventchn/dune
@@ -0,0 +1,11 @@
+(library
+ (foreign_stubs
+  (language c)
+  (names xeneventchn_stubs)
+  (extra_deps ../../../include/xen/xen.h ../../../libs/evtchn/libxenevtchn.so)
+  (include_dirs ../../../include))
+ (name xeneventchn)
+ (public_name xen.eventchn)
+ (libraries unix)
+ (no_dynlink)
+ (c_library_flags -lxenevtchn))
diff --git a/tools/ocaml/libs/mmap/dune b/tools/ocaml/libs/mmap/dune
new file mode 100644
index 00..57a8ab5b9b
--- /dev/null
+++ b/tools/ocaml/libs/mmap/dune
@@ -0,0 +1,9 @@
+(library
+ (foreign_stubs
+  (language c)
+  (names xenmmap_stubs))
+ (name xenmmap)
+ (public_name xen.mmap)
+ (libraries unix)
+ (no_dynlink)
+ (install_c_headers mmap_stubs))
diff --git a/tools/ocaml/libs/xb/dune b/tools/ocaml/libs/xb/dune
new file mode 100644
index 00..13a507ea87
--- /dev/null
+++ b/tools/ocaml/libs/xb/dune
@@ -0,0 +1,10 @@
+(library
+ (foreign_stubs
+  (language c)
+  (extra_deps ../../../include/xen/xen.h)
+  (include_dirs ../../../include)
+  (names xenbus_stubs xs_ring_stubs))
+ (name xenbus)
+ (public_name xen.bus)
+ (no_dynlink)
+ (libraries unix xenmmap))
diff --git a/tools/ocaml/libs/xc/dune b/tools/ocaml/libs/xc/dune
new file mode 100644
index 00..6f9450cd27
--- /dev/null
+++ b/tools/ocaml/libs/xc/dune
@@ -0,0 +1,16 @@
+(rule
+ (with-stdout-to
+  xenctrl_abi_check.h
+  (run perl -w %{dep:abi-check} %{dep:xenctrl_stubs.c} %{dep:xenctrl.ml})))
+
+(library
+ (foreign_stubs
+  (language c)
+  (names xenctrl_stubs)
+  (extra_deps ../../../include/xen/xen.h ../../../libs/ctrl/libxenctrl.so)
+  (include_dirs ../../../include))
+ (name xenctrl)
+ (public_name xen.ctrl)
+ (libraries unix xenmmap)
+ (no_dynlink)
+ (c_library_flags -lxenctrl -lxenguest))
diff --git a/tools/ocaml/libs/xs/dune b/tools/ocaml/libs/xs/dune
new file mode 100644
index 00..086259f51d
--- /dev/null
+++ b/tools/ocaml/libs/xs/dune
@@ -0,0 +1,15 @@
+; fallback mode: the files may have been generated by configure already
+
+(rule
+ (targets paths.ml)
+ (deps paths.ml.in)
+ (mode f

[PATCH v1 6/7] tools/ocaml/libs/xb: hide type of Xb.t

2022-07-29 Thread Edwin Török
The only user of 'xb' that I can find is in-tree oxenstored.
Other code (e.g. xenopsd) would use the mirage 'xenstore' implementation
instead, so changing the API here shouldn't require anyone to update
their code.

Hiding the type will make it easier to change the implementation
in the future without breaking code that relies on it.

No functional change.

Signed-off-by: Edwin Török 
---
 tools/ocaml/libs/xb/xb.ml   | 3 +++
 tools/ocaml/libs/xb/xb.mli  | 9 ++---
 tools/ocaml/xenstored/connection.ml | 8 ++--
 3 files changed, 7 insertions(+), 13 deletions(-)

diff --git a/tools/ocaml/libs/xb/xb.ml b/tools/ocaml/libs/xb/xb.ml
index 104d319d77..8404ddd8a6 100644
--- a/tools/ocaml/libs/xb/xb.ml
+++ b/tools/ocaml/libs/xb/xb.ml
@@ -196,6 +196,9 @@ let peek_output con = Queue.peek con.pkt_out
 let input_len con = Queue.length con.pkt_in
 let has_in_packet con = Queue.length con.pkt_in > 0
 let get_in_packet con = Queue.pop con.pkt_in
+let has_partial_input con = match con.partial_in with
+   | HaveHdr _ -> true
+   | NoHdr (n, _) -> n < Partial.header_size ()
 let has_more_input con =
match con.backend with
| Fd _ -> false
diff --git a/tools/ocaml/libs/xb/xb.mli b/tools/ocaml/libs/xb/xb.mli
index 3a00da6cdd..794e35bb34 100644
--- a/tools/ocaml/libs/xb/xb.mli
+++ b/tools/ocaml/libs/xb/xb.mli
@@ -66,13 +66,7 @@ type backend_mmap = {
 type backend_fd = { fd : Unix.file_descr; }
 type backend = Fd of backend_fd | Xenmmap of backend_mmap
 type partial_buf = HaveHdr of Partial.pkt | NoHdr of int * bytes
-type t = {
-  backend : backend;
-  pkt_in : Packet.t Queue.t;
-  pkt_out : Packet.t Queue.t;
-  mutable partial_in : partial_buf;
-  mutable partial_out : string;
-}
+type t
 val init_partial_in : unit -> partial_buf
 val reconnect : t -> unit
 val queue : t -> Packet.t -> unit
@@ -97,6 +91,7 @@ val has_output : t -> bool
 val peek_output : t -> Packet.t
 val input_len : t -> int
 val has_in_packet : t -> bool
+val has_partial_input : t -> bool
 val get_in_packet : t -> Packet.t
 val has_more_input : t -> bool
 val is_selectable : t -> bool
diff --git a/tools/ocaml/xenstored/connection.ml 
b/tools/ocaml/xenstored/connection.ml
index a94d47cdc2..0ce54cd7f9 100644
--- a/tools/ocaml/xenstored/connection.ml
+++ b/tools/ocaml/xenstored/connection.ml
@@ -125,9 +125,7 @@ let get_perm con =
 let set_target con target_domid =
con.perm <- Perms.Connection.set_target (get_perm con) 
~perms:[Perms.READ; Perms.WRITE] target_domid
 
-let is_backend_mmap con = match con.xb.Xenbus.Xb.backend with
-   | Xenbus.Xb.Xenmmap _ -> true
-   | _ -> false
+let is_backend_mmap con = Xenbus.Xb.is_mmap con.xb
 
 let send_reply con tid rid ty data =
if (String.length data) > xenstore_payload_max && (is_backend_mmap con) 
then
@@ -280,9 +278,7 @@ let get_transaction con tid =
 
 let do_input con = Xenbus.Xb.input con.xb
 let has_input con = Xenbus.Xb.has_in_packet con.xb
-let has_partial_input con = match con.xb.Xenbus.Xb.partial_in with
-   | HaveHdr _ -> true
-   | NoHdr (n, _) -> n < Xenbus.Partial.header_size ()
+let has_partial_input con = Xenbus.Xb.has_partial_input con.xb
 let pop_in con = Xenbus.Xb.get_in_packet con.xb
 let has_more_input con = Xenbus.Xb.has_more_input con.xb
 
-- 
2.34.1




[PATCH v1 2/7] tools/ocaml/*/Makefile: generate paths.ml from configure

2022-07-29 Thread Edwin Török
paths.ml contains various paths known to configure,
and currently is generated via a Makefile rule.
Simplify this and generate it through configure, similar to how
oxenstored.conf is generated from oxenstored.conf.in.

This will allow to reuse the generated file more easily with Dune.

No functional change.

Signed-off-by: Edwin Török 
---
 tools/configure   | 4 +++-
 tools/configure.ac| 2 ++
 tools/ocaml/libs/xs/Makefile  | 5 -
 tools/ocaml/libs/xs/paths.ml.in   | 1 +
 tools/ocaml/xenstored/Makefile| 5 -
 tools/ocaml/xenstored/paths.ml.in | 4 
 6 files changed, 10 insertions(+), 11 deletions(-)
 create mode 100644 tools/ocaml/libs/xs/paths.ml.in
 create mode 100644 tools/ocaml/xenstored/paths.ml.in

diff --git a/tools/configure b/tools/configure
index a052c186a5..41deb7fb96 100755
--- a/tools/configure
+++ b/tools/configure
@@ -2453,7 +2453,7 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu
 
 
 
-ac_config_files="$ac_config_files ../config/Tools.mk 
hotplug/FreeBSD/rc.d/xencommons hotplug/FreeBSD/rc.d/xendriverdomain 
hotplug/Linux/init.d/sysconfig.xencommons 
hotplug/Linux/init.d/sysconfig.xendomains hotplug/Linux/init.d/xen-watchdog 
hotplug/Linux/init.d/xencommons hotplug/Linux/init.d/xendomains 
hotplug/Linux/init.d/xendriverdomain hotplug/Linux/launch-xenstore 
hotplug/Linux/vif-setup hotplug/Linux/xen-hotplug-common.sh 
hotplug/Linux/xendomains hotplug/NetBSD/rc.d/xencommons 
hotplug/NetBSD/rc.d/xendriverdomain ocaml/xenstored/oxenstored.conf"
+ac_config_files="$ac_config_files ../config/Tools.mk 
hotplug/FreeBSD/rc.d/xencommons hotplug/FreeBSD/rc.d/xendriverdomain 
hotplug/Linux/init.d/sysconfig.xencommons 
hotplug/Linux/init.d/sysconfig.xendomains hotplug/Linux/init.d/xen-watchdog 
hotplug/Linux/init.d/xencommons hotplug/Linux/init.d/xendomains 
hotplug/Linux/init.d/xendriverdomain hotplug/Linux/launch-xenstore 
hotplug/Linux/vif-setup hotplug/Linux/xen-hotplug-common.sh 
hotplug/Linux/xendomains hotplug/NetBSD/rc.d/xencommons 
hotplug/NetBSD/rc.d/xendriverdomain ocaml/libs/xs/paths.ml 
ocaml/xenstored/paths.ml ocaml/xenstored/oxenstored.conf"
 
 ac_config_headers="$ac_config_headers config.h"
 
@@ -10935,6 +10935,8 @@ do
 "hotplug/Linux/xendomains") CONFIG_FILES="$CONFIG_FILES 
hotplug/Linux/xendomains" ;;
 "hotplug/NetBSD/rc.d/xencommons") CONFIG_FILES="$CONFIG_FILES 
hotplug/NetBSD/rc.d/xencommons" ;;
 "hotplug/NetBSD/rc.d/xendriverdomain") CONFIG_FILES="$CONFIG_FILES 
hotplug/NetBSD/rc.d/xendriverdomain" ;;
+"ocaml/libs/xs/paths.ml") CONFIG_FILES="$CONFIG_FILES 
ocaml/libs/xs/paths.ml" ;;
+"ocaml/xenstored/paths.ml") CONFIG_FILES="$CONFIG_FILES 
ocaml/xenstored/paths.ml" ;;
 "ocaml/xenstored/oxenstored.conf") CONFIG_FILES="$CONFIG_FILES 
ocaml/xenstored/oxenstored.conf" ;;
 "config.h") CONFIG_HEADERS="$CONFIG_HEADERS config.h" ;;
 "hotplug/Linux/systemd/proc-xen.mount") CONFIG_FILES="$CONFIG_FILES 
hotplug/Linux/systemd/proc-xen.mount" ;;
diff --git a/tools/configure.ac b/tools/configure.ac
index 1094d896fc..32cbe6bd3c 100644
--- a/tools/configure.ac
+++ b/tools/configure.ac
@@ -21,6 +21,8 @@ hotplug/Linux/xen-hotplug-common.sh
 hotplug/Linux/xendomains
 hotplug/NetBSD/rc.d/xencommons
 hotplug/NetBSD/rc.d/xendriverdomain
+ocaml/libs/xs/paths.ml
+ocaml/xenstored/paths.ml
 ocaml/xenstored/oxenstored.conf
 ])
 AC_CONFIG_HEADERS([config.h])
diff --git a/tools/ocaml/libs/xs/Makefile b/tools/ocaml/libs/xs/Makefile
index e934bbb550..e160e6a711 100644
--- a/tools/ocaml/libs/xs/Makefile
+++ b/tools/ocaml/libs/xs/Makefile
@@ -44,8 +44,3 @@ uninstall:
$(OCAMLFIND) remove -destdir $(OCAMLDESTDIR) xenstore
 
 include $(OCAML_TOPLEVEL)/Makefile.rules
-
-genpath-target = $(call buildmakevars2module,paths.ml)
-$(eval $(genpath-target))
-
-GENERATED_FILES += paths.ml
diff --git a/tools/ocaml/libs/xs/paths.ml.in b/tools/ocaml/libs/xs/paths.ml.in
new file mode 100644
index 00..c067f8d012
--- /dev/null
+++ b/tools/ocaml/libs/xs/paths.ml.in
@@ -0,0 +1 @@
+let xen_run_stored = "@XEN_RUN_STORED@"
diff --git a/tools/ocaml/xenstored/Makefile b/tools/ocaml/xenstored/Makefile
index 0b5711b507..6f7333926e 100644
--- a/tools/ocaml/xenstored/Makefile
+++ b/tools/ocaml/xenstored/Makefile
@@ -93,8 +93,3 @@ uninstall:
rm -f $(DESTDIR)$(sbindir)/oxenstored
 
 include $(OCAML_TOPLEVEL)/Makefile.rules
-
-genpath-target = $(call buildmakevars2module,paths.ml)
-$(eval $(genpath-target))
-
-GENERATED_FILES += paths.ml
diff --git a/tools/ocaml/xenstored/paths.ml.in 
b/tools/ocaml/xenstored/paths.ml.in
new file mode 100644
index 00..37949dc8f3
--- /dev/null
+++ b/tools/ocaml/xenstored/paths.ml.in
@@ -0,0 +1,4 @@
+let xen_log_dir = "@XEN_LOG_DIR@"
+let xen_config_dir = "@XEN_CONFIG_DIR@"
+let xen_run_dir = "@XEN_RUN_DIR@"
+let xen_run_stored = "@XEN_RUN_STORED@"
-- 
2.34.1




[PATCH v1 7/7] tools/ocaml/libs/eventchn: do not leak event channels and OCaml 5.0 compat

2022-07-29 Thread Edwin Török
Add a finalizer on the event channel value, so that it calls
`xenevtchn_close` when the value would be GCed.

In practice oxenstored seems to be the only user of this,
and it creates a single global event channel only,
but freeing this could still be useful when run with OCAMLRUNPARAM=c

The code was previously casting a C pointer to an OCaml value,
which should be avoided: OCaml 5.0 won't support it.
(all "naked" C pointers must be wrapped inside an OCaml value,
 either an Abstract tag, or Nativeint, see the manual
 https://ocaml.org/manual/intfc.html#ss:c-outside-head)

Signed-off-by: Edwin Török 
---
 tools/ocaml/libs/eventchn/xeneventchn_stubs.c | 29 +--
 1 file changed, 27 insertions(+), 2 deletions(-)

diff --git a/tools/ocaml/libs/eventchn/xeneventchn_stubs.c 
b/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
index f889a7a2e4..c0d57e2954 100644
--- a/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
+++ b/tools/ocaml/libs/eventchn/xeneventchn_stubs.c
@@ -33,7 +33,30 @@
 #include 
 #include 
 
-#define _H(__h) ((xenevtchn_handle *)(__h))
+/* We want to close the event channel when it is no longer in use,
+   which can only be done safely with a finalizer.
+   Event channels are typically long lived, so we don't need tighter control 
over resource deallocation.
+   Use a custom block
+*/
+
+/* Access the xenevtchn_t* part of the OCaml custom block */
+#define _H(__h) (*((xenevtchn_handle**)Data_custom_val(__h)))
+
+static void stub_evtchn_finalize(value v)
+{
+   /* docs say to not use any CAMLparam* macros here */
+   xenevtchn_close(_H(v));
+}
+
+static struct custom_operations xenevtchn_ops = {
+   "xenevtchn",
+   stub_evtchn_finalize,
+   custom_compare_default, /* raises Failure, cannot compare */
+   custom_hash_default, /* ignored */
+   custom_serialize_default, /* raises Failure, can't serialize */
+   custom_deserialize_default, /* raises Failure, can't deserialize */
+   custom_compare_ext_default /* raises Failure */
+};
 
 CAMLprim value stub_eventchn_init(void)
 {
@@ -48,7 +71,9 @@ CAMLprim value stub_eventchn_init(void)
if (xce == NULL)
caml_failwith("open failed");
 
-   result = (value)xce;
+   /* contains file descriptors, trigger full GC at least every 128 
allocations */
+   result = caml_alloc_custom(_ops, sizeof(xce), 1, 128);
+   _H(result) = xce;
CAMLreturn(result);
 }
 
-- 
2.34.1




[PATCH v1 0/7] tools/ocaml code and build cleanups

2022-07-29 Thread Edwin Török
Various OCaml code cleanups to make building and working on Oxenstored easier,
including compatibility with newer language versions.
This does not yet change the minimum version of OCaml.

A version of this series in a git repository is publicly available at:
https://github.com/edwintorok/xen.git
https://github.com/edwintorok/xen/compare/private/edvint/public?expand=1

Edwin Török (7):
  tools/ocaml/Makefile: do not run ocamldep during make clean
  tools/ocaml/*/Makefile: generate paths.ml from configure
  tools/ocaml/*/dune: dune based build system
  tools/ocaml: Makefile to drive dune
  tools/ocaml: fix compiler warnings
  tools/ocaml/libs/xb: hide type of Xb.t
  tools/ocaml/libs/eventchn: do not leak event channels and OCaml 5.0
compat

 Makefile  |  5 ++
 tools/.gitignore  |  7 ++
 tools/configure   |  4 +-
 tools/configure.ac|  2 +
 tools/dune|  5 ++
 tools/dune-project|  1 +
 tools/ocaml/Makefile.dune | 88 +++
 tools/ocaml/Makefile.rules|  2 +
 tools/ocaml/dune-project  | 27 ++
 tools/ocaml/dune-workspace.dev.in |  2 +
 tools/ocaml/dune-workspace.in | 18 
 tools/ocaml/libs/eventchn/dune| 11 +++
 tools/ocaml/libs/eventchn/xeneventchn_stubs.c | 29 +-
 tools/ocaml/libs/mmap/dune|  9 ++
 tools/ocaml/libs/xb/dune  | 10 +++
 tools/ocaml/libs/xb/xb.ml |  3 +
 tools/ocaml/libs/xb/xb.mli|  9 +-
 tools/ocaml/libs/xc/dune  | 16 
 tools/ocaml/libs/xs/Makefile  |  5 --
 tools/ocaml/libs/xs/dune  | 15 
 tools/ocaml/libs/xs/paths.ml.in   |  1 +
 tools/ocaml/xenstored/Makefile|  5 --
 tools/ocaml/xenstored/connection.ml   | 10 +--
 tools/ocaml/xenstored/dune| 51 +++
 tools/ocaml/xenstored/paths.ml.in |  4 +
 tools/ocaml/xenstored/process.ml  |  5 +-
 26 files changed, 315 insertions(+), 29 deletions(-)
 create mode 100644 tools/.gitignore
 create mode 100644 tools/dune
 create mode 100644 tools/dune-project
 create mode 100644 tools/ocaml/Makefile.dune
 create mode 100644 tools/ocaml/dune-project
 create mode 100644 tools/ocaml/dune-workspace.dev.in
 create mode 100644 tools/ocaml/dune-workspace.in
 create mode 100644 tools/ocaml/libs/eventchn/dune
 create mode 100644 tools/ocaml/libs/mmap/dune
 create mode 100644 tools/ocaml/libs/xb/dune
 create mode 100644 tools/ocaml/libs/xc/dune
 create mode 100644 tools/ocaml/libs/xs/dune
 create mode 100644 tools/ocaml/libs/xs/paths.ml.in
 create mode 100644 tools/ocaml/xenstored/dune
 create mode 100644 tools/ocaml/xenstored/paths.ml.in

-- 
2.34.1




[PATCH v2] x86/msr: fix X2APIC_LAST

2022-07-26 Thread Edwin Török
The latest Intel manual now says the X2APIC reserved range is only
0x800 to 0x8ff (NOT 0xbff).
This changed between SDM 68 (Nov 2018) and SDM 69 (Jan 2019).
The AMD manual documents 0x800-0x8ff too.

There are non-X2APIC MSRs in the 0x900-0xbff range now:
e.g. 0x981 is IA32_TME_CAPABILITY, an architectural MSR.

The new MSR in this range appears to have been introduced in Icelake,
so this commit should be backported to Xen versions supporting Icelake.

Backport: 4.13+

Signed-off-by: Edwin Török 
---

Notes:
Changed since v1:
* include version of Intel SDM where the change occured
* remove opencoded MSR_X2APIC_FIRST + 0xff

 xen/arch/x86/hvm/vmx/vmx.c   | 4 ++--
 xen/arch/x86/include/asm/msr-index.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c
index 47554cc004..17e103188a 100644
--- a/xen/arch/x86/hvm/vmx/vmx.c
+++ b/xen/arch/x86/hvm/vmx/vmx.c
@@ -3397,7 +3397,7 @@ void vmx_vlapic_msr_changed(struct vcpu *v)
 if ( cpu_has_vmx_apic_reg_virt )
 {
 for ( msr = MSR_X2APIC_FIRST;
-  msr <= MSR_X2APIC_FIRST + 0xff; msr++ )
+  msr <= MSR_X2APIC_LAST; msr++ )
 vmx_clear_msr_intercept(v, msr, VMX_MSR_R);
 
 vmx_set_msr_intercept(v, MSR_X2APIC_PPR, VMX_MSR_R);
@@ -3418,7 +3418,7 @@ void vmx_vlapic_msr_changed(struct vcpu *v)
 if ( !(v->arch.hvm.vmx.secondary_exec_control &
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE) )
 for ( msr = MSR_X2APIC_FIRST;
-  msr <= MSR_X2APIC_FIRST + 0xff; msr++ )
+  msr <= MSR_X2APIC_LAST; msr++ )
 vmx_set_msr_intercept(v, msr, VMX_MSR_RW);
 
 vmx_update_secondary_exec_control(v);
diff --git a/xen/arch/x86/include/asm/msr-index.h 
b/xen/arch/x86/include/asm/msr-index.h
index 8cab8736d8..1a928ea6af 100644
--- a/xen/arch/x86/include/asm/msr-index.h
+++ b/xen/arch/x86/include/asm/msr-index.h
@@ -148,7 +148,7 @@
 #define MSR_INTERRUPT_SSP_TABLE 0x06a8
 
 #define MSR_X2APIC_FIRST0x0800
-#define MSR_X2APIC_LAST 0x0bff
+#define MSR_X2APIC_LAST 0x08ff
 
 #define MSR_X2APIC_TPR  0x0808
 #define MSR_X2APIC_PPR  0x080a
-- 
2.34.1




[PATCH] x86/msr: fix X2APIC_LAST

2022-07-26 Thread Edwin Török
The latest Intel manual now says the X2APIC reserved range is only
0x800 to 0x8ff (NOT 0xbff). The AMD manual documents 0x800-0x8ff too.

There are non-X2APIC MSRs in the 0x900-0xbff range now:
e.g. 0x981 is IA32_TME_CAPABILITY, an architectural MSR.

The new MSR in this range appears to have been introduced in Icelake,
so this commit should be backported to Xen versions supporting Icelake.

Backport: 4.13+

Signed-off-by: Edwin Török 
---
 xen/arch/x86/include/asm/msr-index.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/xen/arch/x86/include/asm/msr-index.h 
b/xen/arch/x86/include/asm/msr-index.h
index 8cab8736d8..1a928ea6af 100644
--- a/xen/arch/x86/include/asm/msr-index.h
+++ b/xen/arch/x86/include/asm/msr-index.h
@@ -148,7 +148,7 @@
 #define MSR_INTERRUPT_SSP_TABLE 0x06a8
 
 #define MSR_X2APIC_FIRST0x0800
-#define MSR_X2APIC_LAST 0x0bff
+#define MSR_X2APIC_LAST 0x08ff
 
 #define MSR_X2APIC_TPR  0x0808
 #define MSR_X2APIC_PPR  0x080a
-- 
2.34.1




[PATCH] tools/ocaml/libs/xc: add OCaml stubs to query CPU policy

2021-06-18 Thread Edwin Török
Introduces following functions in Xenctrl and associated types:
get_system_cpu_policy
cpu_policy_to_featureset,
string_of_xen_cpu_policy_index

These are wrappers around the existing C functions in xenctrl.h,
that will be used by xenopsd initially.

-Wno-declaration-after-statement is disabled to allow mixing
declarations and code to simplify writing the stubs
by using variable length arrays on the stack instead of
allocating/freeing memory
(which would require additional error-handling logic).

Signed-off-by: Edwin Török 
---
 tools/ocaml/libs/xc/Makefile|   2 +-
 tools/ocaml/libs/xc/xenctrl.ml  |  37 ++
 tools/ocaml/libs/xc/xenctrl.mli |  71 ++
 tools/ocaml/libs/xc/xenctrl_stubs.c | 195 
 4 files changed, 304 insertions(+), 1 deletion(-)

diff --git a/tools/ocaml/libs/xc/Makefile b/tools/ocaml/libs/xc/Makefile
index b6da4fdbaf..64dca99613 100644
--- a/tools/ocaml/libs/xc/Makefile
+++ b/tools/ocaml/libs/xc/Makefile
@@ -3,7 +3,7 @@ XEN_ROOT=$(TOPLEVEL)/../..
 include $(TOPLEVEL)/common.make
 
 CFLAGS += -I../mmap $(CFLAGS_libxenctrl) $(CFLAGS_libxenguest)
-CFLAGS += $(APPEND_CFLAGS)
+CFLAGS += $(APPEND_CFLAGS) -Wno-declaration-after-statement
 OCAMLINCLUDE += -I ../mmap
 
 OBJS = xenctrl
diff --git a/tools/ocaml/libs/xc/xenctrl.ml b/tools/ocaml/libs/xc/xenctrl.ml
index a5588c643f..fa2cea5091 100644
--- a/tools/ocaml/libs/xc/xenctrl.ml
+++ b/tools/ocaml/libs/xc/xenctrl.ml
@@ -286,6 +286,43 @@ external version_capabilities: handle -> string =
 type featureset_index = Featureset_raw | Featureset_host | Featureset_pv | 
Featureset_hvm
 external get_cpu_featureset : handle -> featureset_index -> int64 array = 
"stub_xc_get_cpu_featureset"
 
+(* order must match the order in Val_cpuid_leaf *)
+type xen_cpuid_leaf = {
+  leaf: int64;
+  subleaf: int64;
+  a: int64;
+  b: int64;
+  c: int64;
+  d: int64;
+}
+
+(* order must match the order in Val_msr_entry *)
+type xen_msr_entry = {
+  idx: int64;
+  flags: int64;
+  value: int64; (* val is a keyword, using 'value' *)
+}
+
+type xen_cpu_policy = {
+  leaves: xen_cpuid_leaf array;
+  msrs: xen_msr_entry array;
+}
+
+(* must match XEN_SYSCTL_cpu_policy* order in xen/include/public/sysctl.h *)
+type xen_cpu_policy_index = Cpu_policy_raw | Cpu_policy_host | 
Cpu_policy_pv_max | Cpu_policy_hvm_max | Cpu_policy_pv_default | 
Cpu_policy_hvm_default
+
+let string_of_xen_cpu_policy_index = function
+  | Cpu_policy_raw -> "Raw"
+  | Cpu_policy_host -> "Host"
+  | Cpu_policy_pv_max -> "PV Max"
+  | Cpu_policy_hvm_max -> "HVM Max"
+  | Cpu_policy_pv_default -> "PV default"
+  | Cpu_policy_hvm_default -> "HVM default"
+
+external get_system_cpu_policy: handle -> xen_cpu_policy_index -> 
xen_cpu_policy = "stub_xc_get_system_cpu_policy"
+
+external cpu_policy_to_featureset: handle -> xen_cpu_policy -> int64 array = 
"stub_xc_policy_to_featureset"
+
 external watchdog : handle -> int -> int32 -> int
   = "stub_xc_watchdog"
 
diff --git a/tools/ocaml/libs/xc/xenctrl.mli b/tools/ocaml/libs/xc/xenctrl.mli
index 6e94940a8a..605adeeec9 100644
--- a/tools/ocaml/libs/xc/xenctrl.mli
+++ b/tools/ocaml/libs/xc/xenctrl.mli
@@ -223,6 +223,77 @@ external version_capabilities : handle -> string
 type featureset_index = Featureset_raw | Featureset_host | Featureset_pv | 
Featureset_hvm
 external get_cpu_featureset : handle -> featureset_index -> int64 array = 
"stub_xc_get_cpu_featureset"
 
+(** CPUID takes a leaf (EAX) and optional subleaf (ECX) as input and
+returns feature information bitset in 4 registers (EAX, EBX, ECX, EDX).
+This record captures one such invocation of CPUID.
+
+CPU manuals contain tables explaining the available leaves/subleaves and 
feature bits:
+
+
https://software.intel.com/content/www/us/en/develop/articles/intel-sdm.html
+  Intel® 64 and IA-32 architectures software developer's  manual 
volume 2A: Instruction set reference
+  Chapter 3.2, Table 3-8
+
+https://developer.amd.com/resources/developer-guides-manuals/
+  AMD64 Architecture Programmer’s Manual Volume 3: General Purpose and 
System Instructions
+  Appendix D Instruction Subsets and CPUID Feature Flags
+ *)
+type xen_cpuid_leaf = {
+  leaf: int64; (** initial EAX value *)
+  subleaf: int64; (** initial ECX value *)
+  a: int64; (** EAX result *)
+  b: int64; (** EBX result *)
+  c: int64; (** ECX result *)
+  d: int64; (** EDX result *)
+}
+
+(** CPU Model Specific Registers control various aspects of CPU behaviour.
+
+RDMSR takes ECX as input and returns its result in EDX:EAX.
+This record captures one invocation of RDMSR.
+
+CPU manuals document the available MSRs and feature bits
+
+   
https://software.intel.com/content/www/us/en/develop/articles/intel-sdm.html
+ Intel® 64 and IA-32 architectur

[PATCH v2 14/17] tools/ocaml: safer Xenmmap interface

2021-05-11 Thread Edwin Török
Xenmmap.mmap_interface is created from multiple places:
* via mmap(), which needs to be unmap()-ed
* xc_map_foreign_range
* xengnttab_map_grant_ref

Signed-off-by: Edwin Török 
---
 tools/ocaml/libs/mmap/gnt.ml  | 14 --
 tools/ocaml/libs/mmap/gnt.mli |  3 ++-
 tools/ocaml/libs/mmap/xenmmap.ml  | 14 --
 tools/ocaml/libs/mmap/xenmmap.mli | 11 ---
 tools/ocaml/libs/xb/xb.ml | 10 +-
 tools/ocaml/libs/xb/xb.mli|  4 ++--
 tools/ocaml/libs/xc/xenctrl.ml|  6 --
 tools/ocaml/libs/xc/xenctrl.mli   |  5 ++---
 tools/ocaml/xenstored/domain.ml   |  2 +-
 9 files changed, 44 insertions(+), 25 deletions(-)

diff --git a/tools/ocaml/libs/mmap/gnt.ml b/tools/ocaml/libs/mmap/gnt.ml
index 65f0334b7c..bef2d3e850 100644
--- a/tools/ocaml/libs/mmap/gnt.ml
+++ b/tools/ocaml/libs/mmap/gnt.ml
@@ -45,16 +45,18 @@ module Gnttab = struct
 ref: gntref;
   }
 
+  external unmap_exn : interface -> Xenmmap.mmap_interface -> unit = 
"stub_gnttab_unmap"
+
+  external map_fresh_exn: interface -> gntref -> domid -> bool -> 
Xenmmap.mmap_interface = "stub_gnttab_map_fresh"
+
   module Local_mapping = struct
 type t = Xenmmap.mmap_interface
 
-let to_pages t = t
+let to_pages interface t =
+  Xenmmap.make t ~unmap:(unmap_exn interface)
   end
 
-  external unmap_exn : interface -> Local_mapping.t -> unit = 
"stub_gnttab_unmap"
-
-  external map_fresh_exn: interface -> gntref -> domid -> bool -> 
Local_mapping.t = "stub_gnttab_map_fresh"
-
   let map_exn interface grant writable =
-  map_fresh_exn interface grant.ref grant.domid writable
+map_fresh_exn interface grant.ref grant.domid writable
+
 end
diff --git a/tools/ocaml/libs/mmap/gnt.mli b/tools/ocaml/libs/mmap/gnt.mli
index 302e13b05d..13ab4c7ead 100644
--- a/tools/ocaml/libs/mmap/gnt.mli
+++ b/tools/ocaml/libs/mmap/gnt.mli
@@ -53,6 +53,7 @@ module Gnttab : sig
 ref: gntref;
 (** id which identifies the specific export in the foreign domain *)
   }
+
   (** A foreign domain must explicitly "grant" us memory and send us the
   "reference". The pair of (foreign domain id, reference) uniquely
   identifies the block of memory. This pair ("grant") is transmitted
@@ -63,7 +64,7 @@ module Gnttab : sig
 type t
 (** Abstract type representing a locally-mapped shared memory page *)
 
-val to_pages: t -> Xenmmap.mmap_interface
+val to_pages: interface -> t -> Xenmmap.t
   end
 
   val map_exn : interface -> grant -> bool -> Local_mapping.t
diff --git a/tools/ocaml/libs/mmap/xenmmap.ml b/tools/ocaml/libs/mmap/xenmmap.ml
index 44b67c89d2..af258942a0 100644
--- a/tools/ocaml/libs/mmap/xenmmap.ml
+++ b/tools/ocaml/libs/mmap/xenmmap.ml
@@ -15,17 +15,27 @@
  *)
 
 type mmap_interface
+type t = mmap_interface * (mmap_interface -> unit)
+
 
 type mmap_prot_flag = RDONLY | WRONLY | RDWR
 type mmap_map_flag = SHARED | PRIVATE
 
 (* mmap: fd -> prot_flag -> map_flag -> length -> offset -> interface *)
-external mmap: Unix.file_descr -> mmap_prot_flag -> mmap_map_flag
+external mmap': Unix.file_descr -> mmap_prot_flag -> mmap_map_flag
-> int -> int -> mmap_interface = "stub_mmap_init"
-external unmap: mmap_interface -> unit = "stub_mmap_final"
 (* read: interface -> start -> length -> data *)
 external read: mmap_interface -> int -> int -> string = "stub_mmap_read"
 (* write: interface -> data -> start -> length -> unit *)
 external write: mmap_interface -> string -> int -> int -> unit = 
"stub_mmap_write"
 (* getpagesize: unit -> size of page *)
+external unmap': mmap_interface -> unit = "stub_mmap_final"
+(* getpagesize: unit -> size of page *)
+let make ?(unmap=unmap') interface = interface, unmap
 external getpagesize: unit -> int = "stub_mmap_getpagesize"
+
+let to_interface (intf, _) = intf
+let mmap fd prot_flag map_flag length offset =
+   let map = mmap' fd prot_flag map_flag length offset in
+   make map ~unmap:unmap'
+let unmap (map, do_unmap) = do_unmap map
diff --git a/tools/ocaml/libs/mmap/xenmmap.mli 
b/tools/ocaml/libs/mmap/xenmmap.mli
index 8f92ed6310..075b24eab4 100644
--- a/tools/ocaml/libs/mmap/xenmmap.mli
+++ b/tools/ocaml/libs/mmap/xenmmap.mli
@@ -14,15 +14,20 @@
  * GNU Lesser General Public License for more details.
  *)
 
+type t
 type mmap_interface
 type mmap_prot_flag = RDONLY | WRONLY | RDWR
 type mmap_map_flag = SHARED | PRIVATE
 
-external mmap : Unix.file_descr -> mmap_prot_flag -> mmap_map_flag -> int -> 
int
- -> mmap_interface = "stub_mmap_init"
-external unmap : mmap_interface -> unit = "stub_mmap_final"
 external read : mmap_interface -> int -> int -> string = "stub_mmap_read

  1   2   >