[libvirt] [PATCH RFC 0/4]] nwfilter: don't reinstantiate filters if they are not changed

2018-10-17 Thread Nikolay Shirokovskiy
The series adds optimization in network filters instantiation as suggested
in [1]. Applied on top of [2].

However this approach has drawback I'm unfortunately discovered too late)
Next steps will left us with no network filters after this series applied:

systemctl stop libvirtd
systemctl restart firewalld
systemctl start libvirtd

In case of system update libvirt binaries ctime will change and filters will be
reinstalled.

[1] https://www.redhat.com/archives/libvir-list/2018-October/msg00657.html
[2] https://www.redhat.com/archives/libvir-list/2018-October/msg00787.html

Nikolay Shirokovskiy (4):
  nwfilter: add nwfilter hash
  nwfilter: don't reinstantiate filters if they are not changed
  nwfilter: force filters reinstantiation on firewalld reload
  nwfilter: force filters reinstantiation on binary update

 src/conf/virnwfilterbindingobj.c   |  40 +
 src/conf/virnwfilterbindingobj.h   |  10 +++
 src/conf/virnwfilterobj.c  | 145 +
 src/conf/virnwfilterobj.h  |   9 ++
 src/libvirt_private.syms   |   6 ++
 src/nwfilter/nwfilter_driver.c |  11 ++-
 src/nwfilter/nwfilter_gentech_driver.c |  67 +--
 src/nwfilter/nwfilter_gentech_driver.h |   6 +-
 8 files changed, 283 insertions(+), 11 deletions(-)

-- 
1.8.3.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH 3/4] nwfilter: force filters reinstantiation on firewalld reload

2018-10-17 Thread Nikolay Shirokovskiy
We need reinstantiation because reload will flush rules installed
by libvirtd.

Signed-off-by: Nikolay Shirokovskiy 
---
 src/nwfilter/nwfilter_driver.c |  6 +++---
 src/nwfilter/nwfilter_gentech_driver.c | 13 +
 src/nwfilter/nwfilter_gentech_driver.h |  3 ++-
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/src/nwfilter/nwfilter_driver.c b/src/nwfilter/nwfilter_driver.c
index 5d25d65..db04868 100644
--- a/src/nwfilter/nwfilter_driver.c
+++ b/src/nwfilter/nwfilter_driver.c
@@ -168,7 +168,7 @@ virNWFilterTriggerRebuildImpl(void *opaque)
 {
 virNWFilterDriverStatePtr nwdriver = opaque;
 
-return virNWFilterBuildAll(nwdriver, true);
+return virNWFilterBuildAll(nwdriver, true, false);
 }
 
 
@@ -264,7 +264,7 @@ nwfilterStateInitialize(bool privileged,
 if (virNWFilterBindingObjListLoadAllConfigs(driver->bindings, 
driver->bindingDir) < 0)
 goto error;
 
-if (virNWFilterBuildAll(driver, false) < 0)
+if (virNWFilterBuildAll(driver, false, false) < 0)
 goto error;
 
 nwfilterDriverUnlock();
@@ -319,7 +319,7 @@ nwfilterStateReload(void)
 
 virNWFilterUnlockFilterUpdates();
 
-virNWFilterBuildAll(driver, false);
+virNWFilterBuildAll(driver, false, true);
 
 nwfilterDriverUnlock();
 
diff --git a/src/nwfilter/nwfilter_gentech_driver.c 
b/src/nwfilter/nwfilter_gentech_driver.c
index 46b1144..a5b3e1a 100644
--- a/src/nwfilter/nwfilter_gentech_driver.c
+++ b/src/nwfilter/nwfilter_gentech_driver.c
@@ -984,7 +984,8 @@ static int
 virNWFilterBuildOne(virNWFilterDriverStatePtr driver,
 virNWFilterBindingObjPtr bindingobj,
 virHashTablePtr skipInterfaces,
-int step)
+int step,
+bool force)
 {
 virNWFilterBindingDefPtr binding = virNWFilterBindingObjGetDef(bindingobj);
 virNWFilterObjPtr filter;
@@ -1020,7 +1021,8 @@ virNWFilterBuildOne(virNWFilterDriverStatePtr driver,
 break;
 
 case STEP_APPLY_CURRENT:
-if ((filter = virNWFilterObjListFindByName(driver->nwfilters,
+if (!force &&
+(filter = virNWFilterObjListFindByName(driver->nwfilters,
binding->filter))) {
 char *filterhash = virNWFilterObjGetHash(filter);
 char *bindinghash = virNWFilterBindingObjGetFilterhash(bindingobj);
@@ -1055,6 +1057,7 @@ struct virNWFilterBuildData {
 virNWFilterDriverStatePtr driver;
 virHashTablePtr skipInterfaces;
 int step;
+bool force;
 };
 
 static int
@@ -1063,15 +1066,17 @@ virNWFilterBuildIter(virNWFilterBindingObjPtr binding, 
void *opaque)
 struct virNWFilterBuildData *data = opaque;
 
 return virNWFilterBuildOne(data->driver, binding,
-   data->skipInterfaces, data->step);
+   data->skipInterfaces, data->step, data->force);
 }
 
 int
 virNWFilterBuildAll(virNWFilterDriverStatePtr driver,
-bool newFilters)
+bool newFilters,
+bool force)
 {
 struct virNWFilterBuildData data = {
 .driver = driver,
+.force = force,
 };
 int ret = 0;
 
diff --git a/src/nwfilter/nwfilter_gentech_driver.h 
b/src/nwfilter/nwfilter_gentech_driver.h
index 3c96c34..bdf3daa 100644
--- a/src/nwfilter/nwfilter_gentech_driver.h
+++ b/src/nwfilter/nwfilter_gentech_driver.h
@@ -55,7 +55,8 @@ virHashTablePtr virNWFilterCreateVarHashmap(const char 
*macaddr,
 const virNWFilterVarValue *value);
 
 int virNWFilterBuildAll(virNWFilterDriverStatePtr driver,
-bool newFilters);
+bool newFilters,
+bool force);
 
 void virNWFilterBindingUpdateHash(virNWFilterObjListPtr nwfilters,
   virNWFilterBindingObjPtr binding);
-- 
1.8.3.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH] nwfilter: fix learning address thread shutdown

2018-10-17 Thread Nikolay Shirokovskiy



On 18.10.2018 03:26, John Ferlan wrote:
> 
> 
> On 10/17/18 4:25 AM, Nikolay Shirokovskiy wrote:
>>
>>
>> On 17.10.2018 01:28, John Ferlan wrote:
>>>
>>>
>>> On 10/12/18 3:23 AM, Nikolay Shirokovskiy wrote:
 If learning thread is configured to learn on all ethernet frames (which is
 hardcoded) then chances are big that there is packet on every iteration of
 inspecting frames loop. As result we will hang on shutdown because we don't
 check threadsTerminate if there is packet.

 Let's just check termination conditions on every iteration.

 Signed-off-by: Nikolay Shirokovskiy 
 ---
  src/nwfilter/nwfilter_learnipaddr.c | 22 +++---
  1 file changed, 7 insertions(+), 15 deletions(-)

 diff --git a/src/nwfilter/nwfilter_learnipaddr.c 
 b/src/nwfilter/nwfilter_learnipaddr.c
 index 008c24b..e6cb996 100644
 --- a/src/nwfilter/nwfilter_learnipaddr.c
 +++ b/src/nwfilter/nwfilter_learnipaddr.c
 @@ -483,6 +483,12 @@ learnIPAddressThread(void *arg)
  while (req->status == 0 && vmaddr == 0) {
  int n = poll(fds, ARRAY_CARDINALITY(fds), PKT_TIMEOUT_MS);
  
 +if (threadsTerminate || req->terminate) {
 +req->status = ECANCELED;
 +showError = false;
 +break;
 +}
 +
>>>
>>> So the theory is that regardless of whether there is a timeout or not,
>>> let's check for termination markers before checking poll call return
>>> status.  Which is fine; however, ...
>>>
  if (n < 0) {
  if (errno == EAGAIN || errno == EINTR)
  continue;
 @@ -492,15 +498,8 @@ learnIPAddressThread(void *arg)
  break;
  }
  
 -if (n == 0) {
 -if (threadsTerminate || req->terminate) {
 -VIR_DEBUG("Terminate request seen, cancelling pcap");
 -req->status = ECANCELED;
 -showError = false;
 -break;
 -}
 +if (n == 0)
  continue;
 -}
  
  if (fds[0].revents & (POLLHUP | POLLERR)) {
  VIR_DEBUG("Error from FD probably dev deleted");
 @@ -512,13 +511,6 @@ learnIPAddressThread(void *arg)
  packet = pcap_next(handle, &header);
  
  if (!packet) {
 -/* Already handled with poll, but lets be sure */
 -if (threadsTerminate || req->terminate) {
 -req->status = ECANCELED;
 -showError = false;
 -break;
 -}
 -
>>>
>>> Why remove this one? Is it possible termination was set after the poll
>>> and if fetching the packet fails, then if these are true let's get out
>>> before possibly continuing back to the poll (which I assume would
>>> timeout and fail).  Secondary question would be should this be separated
>>> from the other part?
>>
>> I guess pcap_next does not takes much time (as it does not wait for IO)
>> so there is a little chance things change after the above check. And
>> even if they are timeout is small and we terminate with little delay
>> right after poll.
>>
>> As to the second question I'm not sure why separation is useful.
>>
>> Nikolay
>>
> 
> OK - I left things as is, slightly edited the commit message w/r/t
> grammar stuff...
> 
> Reviewed-by: John Ferlan 
> (and pushed)
> 

Thanx!

Nikolay

> 
>>>
>>> Just need to ask - maybe the answer alters the commit message a little
>>> bit too.
>>>
>>> John
>>>
  /* Again, already handled above, but lets be sure */
  if (virNetDevValidateConfig(req->binding->portdevname, NULL, 
 req->ifindex) <= 0) {
  virResetLastError();


--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH 4/4] nwfilter: force filters reinstantiation on binary update

2018-10-17 Thread Nikolay Shirokovskiy
This helps us bring correct firewall rules if previous binary
install them incorrectly.

Signed-off-by: Nikolay Shirokovskiy 
---
 src/conf/virnwfilterbindingobj.c   | 20 
 src/conf/virnwfilterbindingobj.h   |  3 +++
 src/libvirt_private.syms   |  1 +
 src/nwfilter/nwfilter_gentech_driver.c |  4 +++-
 4 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/src/conf/virnwfilterbindingobj.c b/src/conf/virnwfilterbindingobj.c
index 355981e..09b757a 100644
--- a/src/conf/virnwfilterbindingobj.c
+++ b/src/conf/virnwfilterbindingobj.c
@@ -37,6 +37,7 @@ struct _virNWFilterBindingObj {
 bool removing;
 virNWFilterBindingDefPtr def;
 char *filterhash;
+time_t libvirtCtime;
 };
 
 
@@ -110,6 +111,7 @@ virNWFilterBindingObjSetFilterhash(virNWFilterBindingObjPtr 
obj,
 {
 VIR_FREE(obj->filterhash);
 obj->filterhash = filterhash;
+obj->libvirtCtime = virGetSelfLastChanged();
 }
 
 
@@ -120,6 +122,12 @@ 
virNWFilterBindingObjGetFilterhash(virNWFilterBindingObjPtr obj)
 }
 
 
+time_t
+virNWFilterBindingObjGetLibvirtCtime(virNWFilterBindingObjPtr obj)
+{
+return obj->libvirtCtime;
+}
+
 /**
  * virNWFilterBindingObjEndAPI:
  * @obj: binding object
@@ -220,12 +228,22 @@ virNWFilterBindingObjParseXML(xmlDocPtr doc,
 {
 virNWFilterBindingObjPtr ret;
 xmlNodePtr node;
+long long int ctime;
 
 if (!(ret = virNWFilterBindingObjNew()))
 return NULL;
 
 ret->filterhash = virXPathString("string(./filterhash)", ctxt);
 
+if (virXPathBoolean("boolean(./libvirtctime)", ctxt) > 0) {
+if (virXPathLongLong("string(./libvirtctime)", ctxt, &ctime) < 0) {
+virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
+   _("invalid libvirtctime format"));
+goto cleanup;
+}
+ret->libvirtCtime = (time_t)ctime;
+}
+
 if (!(node = virXPathNode("./filterbinding", ctxt))) {
 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("filter binding status missing content"));
@@ -304,6 +322,8 @@ virNWFilterBindingObjFormat(const virNWFilterBindingObj 
*obj)
 
 virBufferAdjustIndent(&buf, 2);
 virBufferAsprintf(&buf, "%s\n", obj->filterhash);
+virBufferAsprintf(&buf, "%llu\n",
+  (long long) obj->libvirtCtime);
 
 if (virNWFilterBindingDefFormatBuf(&buf, obj->def) < 0) {
 virBufferFreeAndReset(&buf);
diff --git a/src/conf/virnwfilterbindingobj.h b/src/conf/virnwfilterbindingobj.h
index fbcee03..ab949f8 100644
--- a/src/conf/virnwfilterbindingobj.h
+++ b/src/conf/virnwfilterbindingobj.h
@@ -52,6 +52,9 @@ virNWFilterBindingObjSetFilterhash(virNWFilterBindingObjPtr 
obj,
 char*
 virNWFilterBindingObjGetFilterhash(virNWFilterBindingObjPtr obj);
 
+time_t
+virNWFilterBindingObjGetLibvirtCtime(virNWFilterBindingObjPtr obj);
+
 void
 virNWFilterBindingObjEndAPI(virNWFilterBindingObjPtr *obj);
 
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index cc3aaba..368ee01 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -1058,6 +1058,7 @@ virNWFilterBindingObjEndAPI;
 virNWFilterBindingObjFormat;
 virNWFilterBindingObjGetDef;
 virNWFilterBindingObjGetFilterhash;
+virNWFilterBindingObjGetLibvirtCtime;
 virNWFilterBindingObjGetRemoving;
 virNWFilterBindingObjNew;
 virNWFilterBindingObjParseFile;
diff --git a/src/nwfilter/nwfilter_gentech_driver.c 
b/src/nwfilter/nwfilter_gentech_driver.c
index a5b3e1a..94c2c5b 100644
--- a/src/nwfilter/nwfilter_gentech_driver.c
+++ b/src/nwfilter/nwfilter_gentech_driver.c
@@ -1026,8 +1026,10 @@ virNWFilterBuildOne(virNWFilterDriverStatePtr driver,
binding->filter))) {
 char *filterhash = virNWFilterObjGetHash(filter);
 char *bindinghash = virNWFilterBindingObjGetFilterhash(bindingobj);
+time_t libvirtCtime = 
virNWFilterBindingObjGetLibvirtCtime(bindingobj);
 
-if (filterhash && bindinghash && STREQ(filterhash, bindinghash)) {
+if (libvirtCtime == virGetSelfLastChanged() &&
+filterhash && bindinghash && STREQ(filterhash, bindinghash)) {
 VIR_DEBUG("skip binding reinstantiating owner=%s 
portdevname=%s"
   " filter=%s",
   binding->ownername, binding->portdevname,
-- 
1.8.3.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH 1/4] nwfilter: add nwfilter hash

2018-10-17 Thread Nikolay Shirokovskiy
For filter without references to other filters hash is just sha-256 of filter's
xml. For filters with references hash is sha-256 of string consisting of
filter's self hash and hashes of directly referenced filters.

If filter is not complete that is some of filters it's referencing directly or
indirectly are missing the hash is set to NULL as well as if there was OOM on
hash caculation. So having NULL hash is regular situation.

Signed-off-by: Nikolay Shirokovskiy 
---
 src/conf/virnwfilterobj.c  | 145 +
 src/conf/virnwfilterobj.h  |   9 ++
 src/libvirt_private.syms   |   3 +
 src/nwfilter/nwfilter_driver.c |   3 +
 src/nwfilter/nwfilter_gentech_driver.c |   2 +
 5 files changed, 162 insertions(+)

diff --git a/src/conf/virnwfilterobj.c b/src/conf/virnwfilterobj.c
index 0136a0d..4cc81df 100644
--- a/src/conf/virnwfilterobj.c
+++ b/src/conf/virnwfilterobj.c
@@ -28,6 +28,7 @@
 #include "virlog.h"
 #include "virnwfilterobj.h"
 #include "virstring.h"
+#include "vircrypto.h"
 
 #define VIR_FROM_THIS VIR_FROM_NWFILTER
 
@@ -40,6 +41,8 @@ struct _virNWFilterObj {
 
 virNWFilterDefPtr def;
 virNWFilterDefPtr newDef;
+
+char *hash;
 };
 
 struct _virNWFilterObjList {
@@ -82,6 +85,13 @@ virNWFilterObjGetNewDef(virNWFilterObjPtr obj)
 }
 
 
+char*
+virNWFilterObjGetHash(virNWFilterObjPtr obj)
+{
+return obj->hash;
+}
+
+
 bool
 virNWFilterObjWantRemoved(virNWFilterObjPtr obj)
 {
@@ -307,6 +317,139 @@ virNWFilterDefEqual(const virNWFilterDef *def1,
 }
 
 
+static void
+virNWFilterObjInvalidateHash(virNWFilterObjListPtr nwfilters,
+ virNWFilterObjPtr obj)
+{
+virNWFilterDefPtr def = obj->def;
+size_t i;
+
+if (!obj->hash)
+return;
+
+if (obj->newDef) {
+VIR_FREE(obj->hash);
+return;
+}
+
+for (i = 0; i < def->nentries; i++) {
+virNWFilterObjPtr child;
+char *filterref;
+
+if (def->filterEntries[i]->rule ||
+!def->filterEntries[i]->include)
+continue;
+
+filterref = def->filterEntries[i]->include->filterref;
+
+if (!(child = virNWFilterObjListFindByName(nwfilters, filterref))) {
+VIR_FREE(obj->hash);
+return;
+}
+
+virNWFilterObjInvalidateHash(nwfilters, child);
+
+if (!child->hash) {
+VIR_FREE(obj->hash);
+virNWFilterObjUnlock(child);
+return;
+}
+
+virNWFilterObjUnlock(child);
+}
+}
+
+
+void
+virNWFilterObjListInvalidateHash(virNWFilterObjListPtr nwfilters)
+{
+size_t i;
+virNWFilterObjPtr obj;
+
+for (i = 0; i < nwfilters->count; i++) {
+obj = nwfilters->objs[i];
+virNWFilterObjLock(obj);
+virNWFilterObjInvalidateHash(nwfilters, obj);
+virNWFilterObjUnlock(obj);
+}
+}
+
+
+static void
+virNWFilterObjUpdateHash(virNWFilterObjListPtr nwfilters,
+ virNWFilterObjPtr obj)
+{
+virNWFilterDefPtr def = obj->newDef ? obj->newDef : obj->def;
+size_t i;
+char *hash;
+char *xml;
+virBuffer buf = VIR_BUFFER_INITIALIZER;
+
+if (obj->hash)
+return;
+
+if (!(xml = virNWFilterDefFormat(def)))
+return;
+
+if (virCryptoHashString(VIR_CRYPTO_HASH_SHA256, xml, &hash))
+goto cleanup;
+
+virBufferAsprintf(&buf, "%s\n", hash);
+VIR_FREE(hash);
+
+for (i = 0; i < def->nentries; i++) {
+char *filterref;
+virNWFilterObjPtr child;
+
+if (def->filterEntries[i]->rule ||
+!def->filterEntries[i]->include)
+continue;
+
+filterref = def->filterEntries[i]->include->filterref;
+
+if (!(child = virNWFilterObjListFindByName(nwfilters, filterref)))
+goto cleanup;
+
+virNWFilterObjUpdateHash(nwfilters, child);
+
+if (!child->hash) {
+virNWFilterObjUnlock(child);
+goto cleanup;
+}
+
+virBufferAsprintf(&buf, "%s\n", child->hash);
+
+virNWFilterObjUnlock(child);
+}
+
+if (virBufferCheckError(&buf) < 0)
+goto cleanup;
+
+hash = virBufferContentAndReset(&buf);
+ignore_value(virCryptoHashString(VIR_CRYPTO_HASH_SHA256, hash, 
&obj->hash));
+VIR_FREE(hash);
+
+ cleanup:
+virBufferFreeAndReset(&buf);
+VIR_FREE(xml);
+}
+
+
+void
+virNWFilterObjListUpdateHash(virNWFilterObjListPtr nwfilters)
+{
+size_t i;
+virNWFilterObjPtr obj;
+
+for (i = 0; i < nwfilters->count; i++) {
+obj = nwfilters->objs[i];
+virNWFilterObjLock(obj);
+virNWFilterObjUpdateHash(nwfilters, obj);
+virNWFilterObjUnlock(obj);
+}
+}
+
+
 virNWFilterObjPtr
 virNWFilterObjListAssignDef(virNWFilterObjListPtr nwfilters,
 virNWFilterDefPtr def)
@@ -555,6 +698,8 @@ virNWFilterObjListLoadAllConfigs(virNWFilterObjListPtr 
nwfilters,
 virNWFilterObjUnlock(obj);
 }
 
+virNWF

[libvirt] [PATCH 2/4] nwfilter: don't reinstantiate filters if they are not changed

2018-10-17 Thread Nikolay Shirokovskiy
Skip binding's filter reinstantiation if it is not changed since it was
instantiated last time. The purpose it to fasten libvirtd restart at least if
filters won't changed, see RFC [1].  Thus we need to keep instantiated filter
hash for binding in binding's status.

This patch skips filters reinstantiation on firewalld reloads too but this will
be fixed in next patch.

[1] https://www.redhat.com/archives/libvir-list/2018-October/msg00657.html

Signed-off-by: Nikolay Shirokovskiy 
---
 src/conf/virnwfilterbindingobj.c   | 20 +
 src/conf/virnwfilterbindingobj.h   |  7 +
 src/libvirt_private.syms   |  2 ++
 src/nwfilter/nwfilter_driver.c |  2 ++
 src/nwfilter/nwfilter_gentech_driver.c | 52 +++---
 src/nwfilter/nwfilter_gentech_driver.h |  3 ++
 6 files changed, 82 insertions(+), 4 deletions(-)

diff --git a/src/conf/virnwfilterbindingobj.c b/src/conf/virnwfilterbindingobj.c
index d145fe32..355981e 100644
--- a/src/conf/virnwfilterbindingobj.c
+++ b/src/conf/virnwfilterbindingobj.c
@@ -36,6 +36,7 @@ struct _virNWFilterBindingObj {
 
 bool removing;
 virNWFilterBindingDefPtr def;
+char *filterhash;
 };
 
 
@@ -103,6 +104,22 @@ virNWFilterBindingObjSetRemoving(virNWFilterBindingObjPtr 
obj,
 }
 
 
+void
+virNWFilterBindingObjSetFilterhash(virNWFilterBindingObjPtr obj,
+   char *filterhash)
+{
+VIR_FREE(obj->filterhash);
+obj->filterhash = filterhash;
+}
+
+
+char*
+virNWFilterBindingObjGetFilterhash(virNWFilterBindingObjPtr obj)
+{
+return obj->filterhash;
+}
+
+
 /**
  * virNWFilterBindingObjEndAPI:
  * @obj: binding object
@@ -207,6 +224,8 @@ virNWFilterBindingObjParseXML(xmlDocPtr doc,
 if (!(ret = virNWFilterBindingObjNew()))
 return NULL;
 
+ret->filterhash = virXPathString("string(./filterhash)", ctxt);
+
 if (!(node = virXPathNode("./filterbinding", ctxt))) {
 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
_("filter binding status missing content"));
@@ -284,6 +303,7 @@ virNWFilterBindingObjFormat(const virNWFilterBindingObj 
*obj)
 virBufferAddLit(&buf, "\n");
 
 virBufferAdjustIndent(&buf, 2);
+virBufferAsprintf(&buf, "%s\n", obj->filterhash);
 
 if (virNWFilterBindingDefFormatBuf(&buf, obj->def) < 0) {
 virBufferFreeAndReset(&buf);
diff --git a/src/conf/virnwfilterbindingobj.h b/src/conf/virnwfilterbindingobj.h
index 21ae85b..fbcee03 100644
--- a/src/conf/virnwfilterbindingobj.h
+++ b/src/conf/virnwfilterbindingobj.h
@@ -46,6 +46,13 @@ virNWFilterBindingObjSetRemoving(virNWFilterBindingObjPtr 
obj,
  bool removing);
 
 void
+virNWFilterBindingObjSetFilterhash(virNWFilterBindingObjPtr obj,
+   char *filterhash);
+
+char*
+virNWFilterBindingObjGetFilterhash(virNWFilterBindingObjPtr obj);
+
+void
 virNWFilterBindingObjEndAPI(virNWFilterBindingObjPtr *obj);
 
 char *
diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index a7cfe80..cc3aaba 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -1057,11 +1057,13 @@ virNWFilterBindingObjDelete;
 virNWFilterBindingObjEndAPI;
 virNWFilterBindingObjFormat;
 virNWFilterBindingObjGetDef;
+virNWFilterBindingObjGetFilterhash;
 virNWFilterBindingObjGetRemoving;
 virNWFilterBindingObjNew;
 virNWFilterBindingObjParseFile;
 virNWFilterBindingObjSave;
 virNWFilterBindingObjSetDef;
+virNWFilterBindingObjSetFilterhash;
 virNWFilterBindingObjSetRemoving;
 
 
diff --git a/src/nwfilter/nwfilter_driver.c b/src/nwfilter/nwfilter_driver.c
index 5591c0b..5d25d65 100644
--- a/src/nwfilter/nwfilter_driver.c
+++ b/src/nwfilter/nwfilter_driver.c
@@ -778,6 +778,8 @@ nwfilterBindingCreateXML(virConnectPtr conn,
 ret = NULL;
 goto cleanup;
 }
+
+virNWFilterBindingUpdateHash(driver->nwfilters, obj);
 virNWFilterBindingObjSave(obj, driver->bindingDir);
 
  cleanup:
diff --git a/src/nwfilter/nwfilter_gentech_driver.c 
b/src/nwfilter/nwfilter_gentech_driver.c
index d64621b..46b1144 100644
--- a/src/nwfilter/nwfilter_gentech_driver.c
+++ b/src/nwfilter/nwfilter_gentech_driver.c
@@ -982,10 +982,12 @@ enum {
 
 static int
 virNWFilterBuildOne(virNWFilterDriverStatePtr driver,
-virNWFilterBindingDefPtr binding,
+virNWFilterBindingObjPtr bindingobj,
 virHashTablePtr skipInterfaces,
 int step)
 {
+virNWFilterBindingDefPtr binding = virNWFilterBindingObjGetDef(bindingobj);
+virNWFilterObjPtr filter;
 bool skipIface;
 int ret = 0;
 VIR_DEBUG("Building filter for portdev=%s step=%d", binding->portdevname, 
step);
@@ -1009,13 +1011,39 @@ virNWFilterBuildOne(virNWFilterDriverStatePtr driver,
 break;
 
 case STEP_SWITCH:
-if (!virHashLookup(skipInterfaces, binding->portdevname))
+if (!virHashLookup(skipInterfaces, binding->portdevname)) {
 ret =

Re: [libvirt] [PATCH] conf: More clear error msg for incomplete coalesce xml

2018-10-17 Thread Han Han
On Wed, Oct 17, 2018 at 4:46 PM Martin Kletzander 
wrote:

> On Tue, Oct 16, 2018 at 07:19:41PM -0400, John Ferlan wrote:
> >
> >
> >On 10/14/18 10:26 AM, Han Han wrote:
> >> https://bugzilla.redhat.com/show_bug.cgi?id=1535930
> >>
> >> Report more clear err msg instead of unknown error when coalesce
> >> settings is incomplete.
> >>
>
> Incomplete is not an error.  It's request for removal of the missing
> setting.
> There is no problem if it is incomplete.
>
> Well, I think incomplete xml is the problem because I have reproduce it on
an inactive
VM as https://bugzilla.redhat.com/show_bug.cgi?id=1535930#c4 .

I am not sure it will be something wrong when update the device lively, but
we can fix
this first.

> If you look at the BZ the problem is not the incomplete XML, but the fact
> that
> the code that should update the device fails somewhere without setting an
> error.
> Actually, there should not be an error, it should work.  So this just works
> around the actual issue.
>
> Having said that maybe the problem is somewhere in the parsing part, but
> this is
> not the solution.  We need to "go deeper" to find out why the updating code
> fails and then figure out what to fix from there, not put a sheet over the
> problem.
>
> >> Signed-off-by: Han Han 
> >> ---
> >>  src/conf/domain_conf.c | 6 +-
> >>  1 file changed, 5 insertions(+), 1 deletion(-)
> >>
> >> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
> >> index 9911d56130..e755f45d3d 100644
> >> --- a/src/conf/domain_conf.c
> >> +++ b/src/conf/domain_conf.c
> >> @@ -7804,8 +7804,12 @@ virDomainNetDefCoalesceParseXML(xmlNodePtr node,
> >>  ctxt->node = node;
> >>
> >>  str = virXPathString("string(./rx/frames/@max)", ctxt);
> >> -if (!str)
> >> +if (!str) {
> >> +virReportError(VIR_ERR_XML_DETAIL,
> >> +   "%s",
> >
> >This can be put on the previous line
> >
> >> +   _("incomplete coalesce settings in interface
> xml"));
> >
> >and specifically this could be is missing rx frames max attributes
> >
> >However, according to the RNG from commit 523c9960, it seems the 'rx' is
> >optional as is the '@max' value.  Maybe Martin should provide a comment
> >on this series since he added it.
> >
> >Of course that would cause the whole  to disappear on Format.
> >It would also cause problems because def->coalesce would have something
> >that's empty.
> >
> >So perhaps the best thing to do is pass the @def into here, then only if
> >we get beyond the initial !str comparison do we allocate and fill it in;
> >otherwise, we return 0 if rx/frames/@max is not there.  Prepares us for
> >the future.
> >
> >I guess I'm not 100% clear if max frames == 0 what would happen. Maybe
> >Martin knows (I've CC'd him).
> >
>
> `rx-frames=0` turns that option off.  It's like not having the parameter
> there
> at all (it also works like this with ethtool).
>
Set  `rx-frames=0` is a better solution compared with report a error to
incomplete
the incomplete coalesce setting.

>
> >John
> >
> >>  goto cleanup;
> >> +}
> >>
> >>  if (VIR_ALLOC(ret) < 0)
> >>  goto cleanup;
> >>
>


-- 
Best regards,
---
Han Han
Quality Engineer
Redhat.

Email: h...@redhat.com
Phone: +861065339333
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Re: [libvirt] [RFC 0/3] update NVDIMM support

2018-10-17 Thread Han Han
On Thu, Oct 18, 2018 at 11:07 AM Luyao Zhong  wrote:

> Hi Han,
>
> I'm not sure which release my patches will merge into. How about adding
> the patch to update the release news after my last version of these
> patches.  Waiting for more reviews and comments.
>
Well. That's OK.

> Regards,
> Luyao Zhong
>
> On 2018/10/18 上午9:10, Han Han wrote:
>
>
>
> On Wed, Oct 17, 2018 at 10:25 AM Luyao Zhong 
> wrote:
>
>> Hi libvirt experts,
>>
>> This is the RFC for updating NVDIMM support in libvirt.
>>
>> QEMU has supported four more properties which libvirt has not introduced
>> yet, including 'align', 'pmem', 'nvdimm-persistences' and 'unarmed'.
>>
>> The 'align' property allows users to specify the proper alignment. The
>> previous alignment can only be 4K because QEMU use pagesize as alignment.
>> But some backends may require alignments different from the pagesize.
>>
>> The 'pmem' property allows users to specify whether the backend storage of
>> memory-backend-file is a real persistent memory. Then QEMU will know if
>> it needs to guarrantee the write persistence to the vNVDIMM backend.
>>
>> The 'nvdimm-persistence' property allows users to set platform-supported
>> features about NVDIMM data persistence of a guest.
>>
>> The 'unarmed' property allows users to mark vNVDIMM read-only. Only the
>> device DAX on the real NVDIMM can guarantee the guest write persistence,
>> so it's suggested to set 'unarmed' option to 'on' and then vNVDIMM device
>> will be marked as read-only.
>>
>> Libvirt introduces 'alignsize', 'pmem', 'persistence' and 'unarmed' config
>> elements into xml corresponding to 'align', 'pmem', 'nvdimm-persistence'
>> and 'unarmed' properties in QEMU, and update xml parsing, formating and
>> qemu command-line generating process for NVDIMM.
>>
>> Thanks,
>> Zhong, Luyao
>>
>> Luyao Zhong (3):
>>   xml: introduce more config elements for NVDIMM memory
>>   xml: update xml parsing and formating about NVDIMM memory
>>   qemu: update qemu command-line generating for NVDIMM memory
>>
>>  docs/formatdomain.html.in  |  98
>> +++---
>>  docs/schemas/domaincommon.rng  |  31 +-
>>
> Please add a patch to update the release news in news.xml.
>
>>  src/conf/domain_conf.c | 115
>> +++--
>>  src/conf/domain_conf.h |  14 +++
>>  src/libvirt_private.syms   |   2 +
>>  src/qemu/qemu_command.c|  25 +
>>  .../memory-hotplug-nvdimm-align.args   |  31 ++
>>  .../memory-hotplug-nvdimm-align.xml|  58 +++
>>  .../memory-hotplug-nvdimm-persistence.args |  31 ++
>>  .../memory-hotplug-nvdimm-persistence.xml  |  58 +++
>>  .../memory-hotplug-nvdimm-pmem.args|  31 ++
>>  .../memory-hotplug-nvdimm-pmem.xml |  58 +++
>>  .../memory-hotplug-nvdimm-unarmed.args |  31 ++
>>  .../memory-hotplug-nvdimm-unarmed.xml  |  58 +++
>>  tests/qemuxml2argvtest.c   |  12 +++
>>  .../memory-hotplug-nvdimm-align.xml|   1 +
>>  .../memory-hotplug-nvdimm-persistence.xml  |   1 +
>>  .../memory-hotplug-nvdimm-pmem.xml |   1 +
>>  .../memory-hotplug-nvdimm-unarmed.xml  |   1 +
>>  tests/qemuxml2xmltest.c|   4 +
>>  20 files changed, 636 insertions(+), 25 deletions(-)
>>  create mode 100644
>> tests/qemuxml2argvdata/memory-hotplug-nvdimm-align.args
>>  create mode 100644 tests/qemuxml2argvdata/memory-hotplug-nvdimm-align.xml
>>  create mode 100644
>> tests/qemuxml2argvdata/memory-hotplug-nvdimm-persistence.args
>>  create mode 100644
>> tests/qemuxml2argvdata/memory-hotplug-nvdimm-persistence.xml
>>  create mode 100644 tests/qemuxml2argvdata/memory-hotplug-nvdimm-pmem.args
>>  create mode 100644 tests/qemuxml2argvdata/memory-hotplug-nvdimm-pmem.xml
>>  create mode 100644
>> tests/qemuxml2argvdata/memory-hotplug-nvdimm-unarmed.args
>>  create mode 100644
>> tests/qemuxml2argvdata/memory-hotplug-nvdimm-unarmed.xml
>>  create mode 12
>> tests/qemuxml2xmloutdata/memory-hotplug-nvdimm-align.xml
>>  create mode 12
>> tests/qemuxml2xmloutdata/memory-hotplug-nvdimm-persistence.xml
>>  create mode 12
>> tests/qemuxml2xmloutdata/memory-hotplug-nvdimm-pmem.xml
>>  create mode 12
>> tests/qemuxml2xmloutdata/memory-hotplug-nvdimm-unarmed.xml
>>
>> --
>> 2.7.4
>>
>> --
>> libvir-list mailing list
>> libvir-list@redhat.com
>> https://www.redhat.com/mailman/listinfo/libvir-list
>>
>
>
> --
> Best regards,
> ---
> Han Han
> Quality Engineer
> Redhat.
>
> Email: h...@redhat.com
> Phone: +861065339333
>
> --
> libvir-list mailing 
> listlibvir-list@redhat.comhttps://www.redhat.com/mailman/listinfo/libvir-list
>
>

-- 
Best regards,
---
Han Han
Quality En

Re: [libvirt] [RFC 0/3] update NVDIMM support

2018-10-17 Thread Luyao Zhong

Hi Han,

I'm not sure which release my patches will merge into. How about adding 
the patch to update the release news after my last version of these 
patches.  Waiting for more reviews and comments.


Regards,
Luyao Zhong

On 2018/10/18 上午9:10, Han Han wrote:



On Wed, Oct 17, 2018 at 10:25 AM Luyao Zhong > wrote:


Hi libvirt experts,

This is the RFC for updating NVDIMM support in libvirt.

QEMU has supported four more properties which libvirt has not
introduced
yet, including 'align', 'pmem', 'nvdimm-persistences' and 'unarmed'.

The 'align' property allows users to specify the proper alignment. The
previous alignment can only be 4K because QEMU use pagesize as
alignment.
But some backends may require alignments different from the pagesize.

The 'pmem' property allows users to specify whether the backend
storage of
memory-backend-file is a real persistent memory. Then QEMU will
know if
it needs to guarrantee the write persistence to the vNVDIMM backend.

The 'nvdimm-persistence' property allows users to set
platform-supported
features about NVDIMM data persistence of a guest.

The 'unarmed' property allows users to mark vNVDIMM read-only.
Only the
device DAX on the real NVDIMM can guarantee the guest write
persistence,
so it's suggested to set 'unarmed' option to 'on' and then vNVDIMM
device
will be marked as read-only.

Libvirt introduces 'alignsize', 'pmem', 'persistence' and
'unarmed' config
elements into xml corresponding to 'align', 'pmem',
'nvdimm-persistence'
and 'unarmed' properties in QEMU, and update xml parsing,
formating and
qemu command-line generating process for NVDIMM.

Thanks,
Zhong, Luyao

Luyao Zhong (3):
  xml: introduce more config elements for NVDIMM memory
  xml: update xml parsing and formating about NVDIMM memory
  qemu: update qemu command-line generating for NVDIMM memory

 docs/formatdomain.html.in           
              |  98 +++---
 docs/schemas/domaincommon.rng                      |  31 +-

Please add a patch to update the release news in news.xml.

 src/conf/domain_conf.c                             | 115
+++--
 src/conf/domain_conf.h                             |  14 +++
 src/libvirt_private.syms                           |   2 +
 src/qemu/qemu_command.c                            |  25 +
 .../memory-hotplug-nvdimm-align.args               |  31 ++
 .../memory-hotplug-nvdimm-align.xml                |  58 +++
 .../memory-hotplug-nvdimm-persistence.args         |  31 ++
 .../memory-hotplug-nvdimm-persistence.xml          |  58 +++
 .../memory-hotplug-nvdimm-pmem.args                |  31 ++
 .../memory-hotplug-nvdimm-pmem.xml                 |  58 +++
 .../memory-hotplug-nvdimm-unarmed.args             |  31 ++
 .../memory-hotplug-nvdimm-unarmed.xml              |  58 +++
 tests/qemuxml2argvtest.c                           |  12 +++
 .../memory-hotplug-nvdimm-align.xml                |   1 +
 .../memory-hotplug-nvdimm-persistence.xml          |   1 +
 .../memory-hotplug-nvdimm-pmem.xml                 |   1 +
 .../memory-hotplug-nvdimm-unarmed.xml              |   1 +
 tests/qemuxml2xmltest.c                            |   4 +
 20 files changed, 636 insertions(+), 25 deletions(-)
 create mode 100644
tests/qemuxml2argvdata/memory-hotplug-nvdimm-align.args
 create mode 100644
tests/qemuxml2argvdata/memory-hotplug-nvdimm-align.xml
 create mode 100644
tests/qemuxml2argvdata/memory-hotplug-nvdimm-persistence.args
 create mode 100644
tests/qemuxml2argvdata/memory-hotplug-nvdimm-persistence.xml
 create mode 100644
tests/qemuxml2argvdata/memory-hotplug-nvdimm-pmem.args
 create mode 100644
tests/qemuxml2argvdata/memory-hotplug-nvdimm-pmem.xml
 create mode 100644
tests/qemuxml2argvdata/memory-hotplug-nvdimm-unarmed.args
 create mode 100644
tests/qemuxml2argvdata/memory-hotplug-nvdimm-unarmed.xml
 create mode 12
tests/qemuxml2xmloutdata/memory-hotplug-nvdimm-align.xml
 create mode 12
tests/qemuxml2xmloutdata/memory-hotplug-nvdimm-persistence.xml
 create mode 12
tests/qemuxml2xmloutdata/memory-hotplug-nvdimm-pmem.xml
 create mode 12
tests/qemuxml2xmloutdata/memory-hotplug-nvdimm-unarmed.xml

-- 
2.7.4


--
libvir-list mailing list
libvir-list@redhat.com 
https://www.redhat.com/mailman/listinfo/libvir-list



--
Best regards,
---
Han Han
Quality Engineer
Redhat.

Email: h...@redhat.com 
Phone: +861065339333

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mai

[libvirt] [PATCH v2] snapshot: Don't hose list on deletion failure

2018-10-17 Thread Eric Blake
If qemuDomainSnapshotDiscard() fails for any reason (rare,
but possible with an ill-timed ENOMEM or if
qemuDomainSnapshotForEachQcow2() has problems talking to the
qemu guest monitor), then an attempt to retry the snapshot
deletion API will crash because we didn't undo the effects
of virDomainSnapshotDropParent() temporarily rearranging the
internal list structures, and the second attempt to drop
parents will dereference NULL.  Fix it by instead noting that
there are only two callers to qemuDomainSnapshotDiscard(),
and only one of the two callers wants the parent to be updated;
thus we can move the call to virDomainSnapshotDropParent()
into a code path that only gets executed on success.

Signed-off-by: Eric Blake 

---
v2: avoid use-after-free
---
 src/qemu/qemu_domain.c | 6 --
 src/qemu/qemu_driver.c | 1 -
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index f00f1b3fdb..dd67be5e2a 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -8246,7 +8246,7 @@ int
 qemuDomainSnapshotDiscard(virQEMUDriverPtr driver,
   virDomainObjPtr vm,
   virDomainSnapshotObjPtr snap,
-  bool update_current,
+  bool update_parent,
   bool metadata_only)
 {
 char *snapFile = NULL;
@@ -8275,7 +8275,7 @@ qemuDomainSnapshotDiscard(virQEMUDriverPtr driver,
 goto cleanup;

 if (snap == vm->current_snapshot) {
-if (update_current && snap->def->parent) {
+if (update_parent && snap->def->parent) {
 parentsnap = virDomainSnapshotFindByName(vm->snapshots,
  snap->def->parent);
 if (!parentsnap) {
@@ -8298,6 +8298,8 @@ qemuDomainSnapshotDiscard(virQEMUDriverPtr driver,

 if (unlink(snapFile) < 0)
 VIR_WARN("Failed to unlink %s", snapFile);
+if (update_parent)
+virDomainSnapshotDropParent(snap);
 virDomainSnapshotObjListRemove(vm->snapshots, snap);

 ret = 0;
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index a52e2495d5..9f71641dfa 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -16526,7 +16526,6 @@ qemuDomainSnapshotDelete(virDomainSnapshotPtr snapshot,
 snap->first_child = NULL;
 ret = 0;
 } else {
-virDomainSnapshotDropParent(snap);
 ret = qemuDomainSnapshotDiscard(driver, vm, snap, true, metadata_only);
 }

-- 
2.17.2

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [RFC 0/3] update NVDIMM support

2018-10-17 Thread Han Han
On Wed, Oct 17, 2018 at 10:25 AM Luyao Zhong  wrote:

> Hi libvirt experts,
>
> This is the RFC for updating NVDIMM support in libvirt.
>
> QEMU has supported four more properties which libvirt has not introduced
> yet, including 'align', 'pmem', 'nvdimm-persistences' and 'unarmed'.
>
> The 'align' property allows users to specify the proper alignment. The
> previous alignment can only be 4K because QEMU use pagesize as alignment.
> But some backends may require alignments different from the pagesize.
>
> The 'pmem' property allows users to specify whether the backend storage of
> memory-backend-file is a real persistent memory. Then QEMU will know if
> it needs to guarrantee the write persistence to the vNVDIMM backend.
>
> The 'nvdimm-persistence' property allows users to set platform-supported
> features about NVDIMM data persistence of a guest.
>
> The 'unarmed' property allows users to mark vNVDIMM read-only. Only the
> device DAX on the real NVDIMM can guarantee the guest write persistence,
> so it's suggested to set 'unarmed' option to 'on' and then vNVDIMM device
> will be marked as read-only.
>
> Libvirt introduces 'alignsize', 'pmem', 'persistence' and 'unarmed' config
> elements into xml corresponding to 'align', 'pmem', 'nvdimm-persistence'
> and 'unarmed' properties in QEMU, and update xml parsing, formating and
> qemu command-line generating process for NVDIMM.
>
> Thanks,
> Zhong, Luyao
>
> Luyao Zhong (3):
>   xml: introduce more config elements for NVDIMM memory
>   xml: update xml parsing and formating about NVDIMM memory
>   qemu: update qemu command-line generating for NVDIMM memory
>
>  docs/formatdomain.html.in  |  98
> +++---
>  docs/schemas/domaincommon.rng  |  31 +-
>
Please add a patch to update the release news in news.xml.

>  src/conf/domain_conf.c | 115
> +++--
>  src/conf/domain_conf.h |  14 +++
>  src/libvirt_private.syms   |   2 +
>  src/qemu/qemu_command.c|  25 +
>  .../memory-hotplug-nvdimm-align.args   |  31 ++
>  .../memory-hotplug-nvdimm-align.xml|  58 +++
>  .../memory-hotplug-nvdimm-persistence.args |  31 ++
>  .../memory-hotplug-nvdimm-persistence.xml  |  58 +++
>  .../memory-hotplug-nvdimm-pmem.args|  31 ++
>  .../memory-hotplug-nvdimm-pmem.xml |  58 +++
>  .../memory-hotplug-nvdimm-unarmed.args |  31 ++
>  .../memory-hotplug-nvdimm-unarmed.xml  |  58 +++
>  tests/qemuxml2argvtest.c   |  12 +++
>  .../memory-hotplug-nvdimm-align.xml|   1 +
>  .../memory-hotplug-nvdimm-persistence.xml  |   1 +
>  .../memory-hotplug-nvdimm-pmem.xml |   1 +
>  .../memory-hotplug-nvdimm-unarmed.xml  |   1 +
>  tests/qemuxml2xmltest.c|   4 +
>  20 files changed, 636 insertions(+), 25 deletions(-)
>  create mode 100644 tests/qemuxml2argvdata/memory-hotplug-nvdimm-align.args
>  create mode 100644 tests/qemuxml2argvdata/memory-hotplug-nvdimm-align.xml
>  create mode 100644
> tests/qemuxml2argvdata/memory-hotplug-nvdimm-persistence.args
>  create mode 100644
> tests/qemuxml2argvdata/memory-hotplug-nvdimm-persistence.xml
>  create mode 100644 tests/qemuxml2argvdata/memory-hotplug-nvdimm-pmem.args
>  create mode 100644 tests/qemuxml2argvdata/memory-hotplug-nvdimm-pmem.xml
>  create mode 100644
> tests/qemuxml2argvdata/memory-hotplug-nvdimm-unarmed.args
>  create mode 100644
> tests/qemuxml2argvdata/memory-hotplug-nvdimm-unarmed.xml
>  create mode 12
> tests/qemuxml2xmloutdata/memory-hotplug-nvdimm-align.xml
>  create mode 12
> tests/qemuxml2xmloutdata/memory-hotplug-nvdimm-persistence.xml
>  create mode 12 tests/qemuxml2xmloutdata/memory-hotplug-nvdimm-pmem.xml
>  create mode 12
> tests/qemuxml2xmloutdata/memory-hotplug-nvdimm-unarmed.xml
>
> --
> 2.7.4
>
> --
> libvir-list mailing list
> libvir-list@redhat.com
> https://www.redhat.com/mailman/listinfo/libvir-list
>


-- 
Best regards,
---
Han Han
Quality Engineer
Redhat.

Email: h...@redhat.com
Phone: +861065339333
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Re: [libvirt] [PATCH] snapshot: Don't hose list on deletion failure

2018-10-17 Thread Eric Blake

On 10/17/18 7:30 PM, Eric Blake wrote:

If qemuDomainSnapshotDiscard() fails for any reason (rare,
but possible with an ill-timed ENOMEM or if
qemuDomainSnapshotForEachQcow2() has problems talking to the
qemu guest monitor), then an attempt to retry the snapshot
deletion API will crash because we didn't undo the effects
of virDomainSnapshotDropParent() temporarily rearranging the
internal list structures, and the second attempt to drop
parents will dereference NULL.  Fix it by instead only
rearranging the internal list on success.

Signed-off-by: Eric Blake 
---

Found by accident, since I copied this code to my checkpoint
code and hit the segfault there.  This one has been latent
for years, but is not a security bug since there is no escalation
if you already have sufficient privilege to delete snapshots.

  src/qemu/qemu_driver.c | 3 ++-
  1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 3f143e91c1..9a3f2a090d 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -16699,8 +16699,9 @@ qemuDomainSnapshotDelete(virDomainSnapshotPtr snapshot,
  snap->first_child = NULL;
  ret = 0;
  } else {
-virDomainSnapshotDropParent(snap);
  ret = qemuDomainSnapshotDiscard(driver, vm, snap, true, 
metadata_only);
+if (ret == 0)
+virDomainSnapshotDropParent(snap);


NACK; this creates a use-after-free scenario, since snap is freed by 
qemuDomainSnapshotDiscard on success.  I'll have to come up with 
something different.


--
Eric Blake, Principal Software Engineer
Red Hat, Inc.   +1-919-301-3266
Virtualization:  qemu.org | libvirt.org

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH] network: add prefix to dhcp range of dnsmasq conf file for IPv4 too

2018-10-17 Thread Laine Stump
Self-NACK. I'vebeen informed by the original reporter that for IPv4 we
have to specify a netmask rather than prefix (it worked for me with
prefix, but for him it didn't :-/)

I'll send a V2 in the morning. Too close to bedtime now to do anything
that requires attention to detail...

On 10/17/2018 12:06 PM, Laine Stump wrote:
> dnsmasq documentation says that the *IPv4* prefix/network
> address/broadcast address sent to dhcp clients will be automatically
> determined by dnsmasq by looking at the interface it's listening on,
> so the original libvirt code that added dhcp support to virtual
> networks did not add a prefix to the dnsmasq commandline (or later,
> the dnsmasq conf file).
>
> For *IPv6* however, dnsmasq cannot automatically determine the prefix,
> so it must be explicitly provided in the conf file (as a part of the
> dhcp-range option). Years after the initial IPv4 support, when IPv6
> dhcp support was added, libvirt added the prefix to dhcp-range, but
> only for IPv6 (following the "if it ain't broke, don't fix it"
> doctrine).
>
> Recently a user reported (privately, because they suspected a possible
> security implication, which turned out to be unfounded) a bug on a
> host where one of the interfaces was a superset of the libvirt network
> where dhcp is needed (e.g., the host's ethernet is 10.0.0.20/8, and
> the libvirt network is 10.10.0.1/24). For some reason dnsmasq was
> supplying the netmask/broadcast address for the /8 network to clients
> requesting an address on the /24 interface.
>
> This seems like a bug in dnsmasq, but even if/when it gets fixed
> there, it looks like there is no harm in just adding the prefix to all
> dhcp-range options regardless of IPv4 vs IPv6, so that's what this
> patch does.
>
> Signed-off-by: Laine Stump 
> ---
>  src/network/bridge_driver.c   | 7 ++-
>  tests/networkxml2confdata/dhcp6-nat-network.conf  | 2 +-
>  tests/networkxml2confdata/isolated-network.conf   | 2 +-
>  tests/networkxml2confdata/nat-network-dns-srv-record-minimal.conf | 2 +-
>  tests/networkxml2confdata/nat-network-dns-srv-record.conf | 2 +-
>  tests/networkxml2confdata/nat-network-dns-txt-record.conf | 2 +-
>  tests/networkxml2confdata/nat-network-name-with-quotes.conf   | 2 +-
>  tests/networkxml2confdata/nat-network.conf| 2 +-
>  tests/networkxml2confdata/netboot-network.conf| 2 +-
>  tests/networkxml2confdata/netboot-proxy-network.conf  | 2 +-
>  tests/networkxml2confdata/ptr-domains-auto.conf   | 2 +-
>  11 files changed, 12 insertions(+), 15 deletions(-)
>
> diff --git a/src/network/bridge_driver.c b/src/network/bridge_driver.c
> index 4bbc4f5a6d..7f5ff79fdc 100644
> --- a/src/network/bridge_driver.c
> +++ b/src/network/bridge_driver.c
> @@ -1416,11 +1416,8 @@ networkDnsmasqConfContents(virNetworkObjPtr obj,
>  !(eaddr = virSocketAddrFormat(&ipdef->ranges[r].end)))
>  goto cleanup;
>  
> -virBufferAsprintf(&configbuf, "dhcp-range=%s,%s",
> -  saddr, eaddr);
> -if (VIR_SOCKET_ADDR_IS_FAMILY(&ipdef->address, AF_INET6))
> -virBufferAsprintf(&configbuf, ",%d", prefix);
> -virBufferAddLit(&configbuf, "\n");
> +virBufferAsprintf(&configbuf, "dhcp-range=%s,%s,%d\n",
> +  saddr, eaddr, prefix);
>  
>  VIR_FREE(saddr);
>  VIR_FREE(eaddr);
> diff --git a/tests/networkxml2confdata/dhcp6-nat-network.conf 
> b/tests/networkxml2confdata/dhcp6-nat-network.conf
> index d1058df3b6..e1e110fe23 100644
> --- a/tests/networkxml2confdata/dhcp6-nat-network.conf
> +++ b/tests/networkxml2confdata/dhcp6-nat-network.conf
> @@ -8,7 +8,7 @@ strict-order
>  except-interface=lo
>  bind-dynamic
>  interface=virbr0
> -dhcp-range=192.168.122.2,192.168.122.254
> +dhcp-range=192.168.122.2,192.168.122.254,24
>  dhcp-no-override
>  dhcp-authoritative
>  dhcp-range=2001:db8:ac10:fd01::1:10,2001:db8:ac10:fd01::1:ff,64
> diff --git a/tests/networkxml2confdata/isolated-network.conf 
> b/tests/networkxml2confdata/isolated-network.conf
> index ce4a59f6c1..d182f42f0a 100644
> --- a/tests/networkxml2confdata/isolated-network.conf
> +++ b/tests/networkxml2confdata/isolated-network.conf
> @@ -10,7 +10,7 @@ bind-interfaces
>  listen-address=192.168.152.1
>  dhcp-option=3
>  no-resolv
> -dhcp-range=192.168.152.2,192.168.152.254
> +dhcp-range=192.168.152.2,192.168.152.254,24
>  dhcp-no-override
>  dhcp-authoritative
>  dhcp-lease-max=253
> diff --git 
> a/tests/networkxml2confdata/nat-network-dns-srv-record-minimal.conf 
> b/tests/networkxml2confdata/nat-network-dns-srv-record-minimal.conf
> index f35ea1d5d4..678e4a4bfd 100644
> --- a/tests/networkxml2confdata/nat-network-dns-srv-record-minimal.conf
> +++ b/tests/networkxml2confdata/nat-network-dns-srv-record-minimal.conf
> @@ -13,7 +13,7 @@ listen-

[libvirt] [PATCH] snapshot: Don't hose list on deletion failure

2018-10-17 Thread Eric Blake
If qemuDomainSnapshotDiscard() fails for any reason (rare,
but possible with an ill-timed ENOMEM or if
qemuDomainSnapshotForEachQcow2() has problems talking to the
qemu guest monitor), then an attempt to retry the snapshot
deletion API will crash because we didn't undo the effects
of virDomainSnapshotDropParent() temporarily rearranging the
internal list structures, and the second attempt to drop
parents will dereference NULL.  Fix it by instead only
rearranging the internal list on success.

Signed-off-by: Eric Blake 
---

Found by accident, since I copied this code to my checkpoint
code and hit the segfault there.  This one has been latent
for years, but is not a security bug since there is no escalation
if you already have sufficient privilege to delete snapshots.

 src/qemu/qemu_driver.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index 3f143e91c1..9a3f2a090d 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -16699,8 +16699,9 @@ qemuDomainSnapshotDelete(virDomainSnapshotPtr snapshot,
 snap->first_child = NULL;
 ret = 0;
 } else {
-virDomainSnapshotDropParent(snap);
 ret = qemuDomainSnapshotDiscard(driver, vm, snap, true, metadata_only);
+if (ret == 0)
+virDomainSnapshotDropParent(snap);
 }

  endjob:
-- 
2.17.2

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH] nwfilter: fix learning address thread shutdown

2018-10-17 Thread John Ferlan



On 10/17/18 4:25 AM, Nikolay Shirokovskiy wrote:
> 
> 
> On 17.10.2018 01:28, John Ferlan wrote:
>>
>>
>> On 10/12/18 3:23 AM, Nikolay Shirokovskiy wrote:
>>> If learning thread is configured to learn on all ethernet frames (which is
>>> hardcoded) then chances are big that there is packet on every iteration of
>>> inspecting frames loop. As result we will hang on shutdown because we don't
>>> check threadsTerminate if there is packet.
>>>
>>> Let's just check termination conditions on every iteration.
>>>
>>> Signed-off-by: Nikolay Shirokovskiy 
>>> ---
>>>  src/nwfilter/nwfilter_learnipaddr.c | 22 +++---
>>>  1 file changed, 7 insertions(+), 15 deletions(-)
>>>
>>> diff --git a/src/nwfilter/nwfilter_learnipaddr.c 
>>> b/src/nwfilter/nwfilter_learnipaddr.c
>>> index 008c24b..e6cb996 100644
>>> --- a/src/nwfilter/nwfilter_learnipaddr.c
>>> +++ b/src/nwfilter/nwfilter_learnipaddr.c
>>> @@ -483,6 +483,12 @@ learnIPAddressThread(void *arg)
>>>  while (req->status == 0 && vmaddr == 0) {
>>>  int n = poll(fds, ARRAY_CARDINALITY(fds), PKT_TIMEOUT_MS);
>>>  
>>> +if (threadsTerminate || req->terminate) {
>>> +req->status = ECANCELED;
>>> +showError = false;
>>> +break;
>>> +}
>>> +
>>
>> So the theory is that regardless of whether there is a timeout or not,
>> let's check for termination markers before checking poll call return
>> status.  Which is fine; however, ...
>>
>>>  if (n < 0) {
>>>  if (errno == EAGAIN || errno == EINTR)
>>>  continue;
>>> @@ -492,15 +498,8 @@ learnIPAddressThread(void *arg)
>>>  break;
>>>  }
>>>  
>>> -if (n == 0) {
>>> -if (threadsTerminate || req->terminate) {
>>> -VIR_DEBUG("Terminate request seen, cancelling pcap");
>>> -req->status = ECANCELED;
>>> -showError = false;
>>> -break;
>>> -}
>>> +if (n == 0)
>>>  continue;
>>> -}
>>>  
>>>  if (fds[0].revents & (POLLHUP | POLLERR)) {
>>>  VIR_DEBUG("Error from FD probably dev deleted");
>>> @@ -512,13 +511,6 @@ learnIPAddressThread(void *arg)
>>>  packet = pcap_next(handle, &header);
>>>  
>>>  if (!packet) {
>>> -/* Already handled with poll, but lets be sure */
>>> -if (threadsTerminate || req->terminate) {
>>> -req->status = ECANCELED;
>>> -showError = false;
>>> -break;
>>> -}
>>> -
>>
>> Why remove this one? Is it possible termination was set after the poll
>> and if fetching the packet fails, then if these are true let's get out
>> before possibly continuing back to the poll (which I assume would
>> timeout and fail).  Secondary question would be should this be separated
>> from the other part?
> 
> I guess pcap_next does not takes much time (as it does not wait for IO)
> so there is a little chance things change after the above check. And
> even if they are timeout is small and we terminate with little delay
> right after poll.
> 
> As to the second question I'm not sure why separation is useful.
> 
> Nikolay
> 

OK - I left things as is, slightly edited the commit message w/r/t
grammar stuff...

Reviewed-by: John Ferlan 
(and pushed)

John

>>
>> Just need to ask - maybe the answer alters the commit message a little
>> bit too.
>>
>> John
>>
>>>  /* Again, already handled above, but lets be sure */
>>>  if (virNetDevValidateConfig(req->binding->portdevname, NULL, 
>>> req->ifindex) <= 0) {
>>>  virResetLastError();
>>>

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH RFC v2] qemu: fix deadlock when waiting in non async jobs

2018-10-17 Thread John Ferlan



On 10/16/18 3:22 AM, Nikolay Shirokovskiy wrote:
> 
> 
> On 16.10.2018 03:00, John Ferlan wrote:
>>
>>
>> On 10/8/18 4:10 AM, Nikolay Shirokovskiy wrote:
>>> Block job abort operation can not handle properly qemu crashes when waiting 
>>> for
>>> abort/pivot completion. Deadlock scenario is next:
>>>
>>> - qemuDomainBlockJobAbort waits for pivot/abort completion
>>> - qemu crashes, then qemuProcessBeginStopJob broadcasts for VM condition and
>>>   then waits for job condition (taken by qemuDomainBlockJobAbort)
>>> - qemuDomainBlockJobAbort awakes but nothing really changed, VM is still
>>>   active (vm->def->id != -1) so thread starts waiting for completion again.
>>>   Now two threads are in deadlock.
>>>
>>> First let's remove broadcast in qemuProcessBeginStopJob. It is simply wrong
>>> because it is not set any condition before broadcast so that awaked threads 
>>> can
>>> not detect any changes. Crashing domain during async job will continue to be
>>> handled properly because destroy job can run concurrently with async job and
>>> destroy job calls qemuProcessStop which sets vm->def->id to -1 and 
>>> broadcasts.
>>
>> Hmm... Although blockjobs are not my area of expertise, I do seem to
>> have a knack for reading and commenting on patches with these edge
>> conditions.
>>
>> At first, taken alone this made it seem like separate patches are
>> required, but maybe not depending on the relationship described above.
>> As an aside, for this paragraph hunk you could call out commit 4d0c535a3
>> where this is/was introduced. Beyond the refactor, the broadcast was
>> added; however, it seems it was done so on purpose since the broadcast
>> would seemingly allowing something to be awoken.
>>
>> Beyond that - take away the scenario you describing where QEMU crashes.
>> In the normal path, if you remove the broadcast, then do things work
>> properly?
> 
> As far as I can see. In all jobs where we we wait on vm condition we
> check misc state variables after that so if state is not changed than
> broadcasting will not help. (The only exception is migration and derivatives
> with qemu not supporting events but in this case we use sleeps and
> do not wait).
> 

To be clear, you are referencing virDomainObjWait[Until] callers that
aren't found within qemu_migration.c.

The two qemu_hotplug.c examples are waiting for QEMU events related to
tray eject or device removal, but are limited in their duration. If they
don't get the event in the specified time they have their means to
signify the timeout.

The two qemu_driver.c examples are possibly waiting forever. One waits
for an external event to signify a memory dump is complete via
qemuProcessHandleDumpCompleted or the job aborted. The other waits for
the blockjob to be completed when qemuBlockJobEventProcess clear the
flag. Both should also theoretically fail when the domain or qemu dies;
however, since both use virDomainObjWait which when properly tickled by
broadcast will call virDomainObjIsActive to compare vm->def->id != -1.

So the contention (for both I think) is that because the = -1 is not
done when QEMU is killed we're stuck. So rather than wait on something
that won't happen - use the EOF event as a way to force exit once a
broadcast happens via a qemu_domain specific qemuDomainObjWait.

Is that a "fair summary"?

>>
>> Since a block job would set @priv->blockjob when a block job starts and
>> is cleared during qemuBlockJobEventProcess processing when status is
>> VIR_DOMAIN_BLOCK_JOB_COMPLETED, VIR_DOMAIN_BLOCK_JOB_FAILED, or
>> VIR_DOMAIN_BLOCK_JOB_CANCELED.
>>
>> What about setting a @priv->blockjobAbort when the abort starts. Then
>> perhaps processMonitorEOFEvent or qemuProcessHandleMonitorEOF can handle
>> that properly so that we don't deadlock.
> 
> But how we can handle it the other way? I see no other option now besides
> setting some state variable and signalling after that to help non async
> job to finish and then let EOF handler proceed. (However check suggestions 
> after --- )
> 

OK, so perhaps that's just a rename of your @monEOF, but specific to the
example, but based on what I typed above would seemingly not be enough,
so let's stick with monEOF...

> Nikolay
> 
>>
>> Perhaps or hopefully, Jirka or Peter will comment too with this bump.
>>
>> John
>>
>>>
>>> Second let's introduce flag that EOF is received and broadcast after that.
>>> Now non async jobs can check this flag in wait loop.
>>>
>>> Signed-off-by: Nikolay Shirokovskiy 
>>>
>>> ---
>>>
>>> Diff from v1:


Just making sure - this RFC v2 comes from a series in Apr/May of this year:

https://www.redhat.com/archives/libvir-list/2018-April/msg01752.html

w/ review dialog spilling into May:
https://www.redhat.com/archives/libvir-list/2018-May/msg00126.html

based on the series :

https://www.redhat.com/archives/libvir-list/2018-April/msg01713.html

that you SNACK'd.

An awful long time to remember context!  When you reference earlier
patches, please try to remember to place a li

Re: [libvirt] [PATCH v4 2/5] libxl: add support for PVH

2018-10-17 Thread Marek Marczykowski-Górecki
On Sat, Oct 13, 2018 at 08:46:19AM -0600, Jim Fehlig wrote:
> I had some couch time at the start of the weekend and was finally able to
> try using this series with virt-install. As it turns out, reporting
> duplicate  blocks for  'xen' is not quite right. Instead we
> will want to report the additional  under the existing 'xen'
>  blocks. 

Is that virt-install limitation? In that case, IMO virt-install should
be fixed, instead of changing capabilities xml to match its limitations.

-- 
Best Regards,
Marek Marczykowski-Górecki
Invisible Things Lab
A: Because it messes up the order in which people normally read text.
Q: Why is top-posting such a bad thing?


signature.asc
Description: PGP signature
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Re: [libvirt] [PATCH v4 2/5] libxl: add support for PVH

2018-10-17 Thread Jim Fehlig

On 10/16/18 7:23 PM, Marek Marczykowski-Górecki wrote:

On Sat, Oct 13, 2018 at 08:46:19AM -0600, Jim Fehlig wrote:

I had some couch time at the start of the weekend and was finally able to
try using this series with virt-install. As it turns out, reporting
duplicate  blocks for  'xen' is not quite right. Instead we
will want to report the additional  under the existing 'xen'
 blocks. E.g. instead of adding a duplicate

   
 xen
 
   64
   /usr/lib/xen/bin/qemu-system-i386
   xenpvh
   
 
 ...
   

we'll want to include the xenpvh machine in the existing  config for xen

   
 xen
 
   64
   /usr/lib/xen/bin/qemu-system-i386
   xenpvh
   xenpv
   
 
   

With this change to the capabilities reporting, virt-install works without
modification using

virt-install --paravirt --machine xenpv ...

I didn't think too hard about the best way to handle this, but the attached
diff is a POC hack that works with unmodified virt-install.


I can get it reported this way, but it will be wrong, because 
will be incorrectly reported. For example hap should be reported for
xenpvh, but not for xenpv, so bundling them together makes it
impossible. Similar for acpi and probably others too.


Yes, you are correct :-(. Modeling PVH has been more of a PITA than I expected.



If this really needs to be reported together, I'd go with reporting
superset of features, so os_type xen entry will have all features of PV
and PVH.


Reporting features that cannot possibly be supported doesn't see right. Let's 
summarize what we've learned thus far and see if that helps with modeling PVH.


PVH is a new xen machine type that is a hybrid of PV and HVM. Like HVM, PVH 
requires hardware virtualization support. Like PV, PVH requires a modified guest 
kernel, and one that is modified differently than PV (e.g. CONFIG_XEN_PVH vs
CONFIG_XEN_PV in the linux kernel). PV and PVH have different feature sets. E.g. 
PVH supports HAP, ACPI, etc., but PV does not. (Perhaps another useful data 
point: the long term goal of the xen community is to remove CONFIG_XEN_PV, 
essentially making PVH the new PV from xen perspective, but that is obviously a 
long ways out.)


Currently in libvirt, PV is modeled as VIR_DOMAIN_OSTYPE_XEN and machine 
"xenpv". HVM is modeled as VIR_DOMAIN_OSTYPE_HVM and machine "xenfv". For PVH 
we've considered VIR_DOMAIN_OSTYPE_XENPVH with machine "xenpvh", or simply 
adding PVH as machine "xenpvh" under existing VIR_DOMAIN_OSTYPE_XEN.


I've pushed for adding a new machine "xenpvh" under existing 
VIR_DOMAIN_OSTYPE_XEN with primary reason that both are OS types modified to run 
on xen. Also existing tools like virt-{install,manager} know how to handle 
OSTYPE_{HVM,XEN} and machines, allowing them to support PVH without (or with 
minimal) modification.


Have I been narrow-minded with my "both are OS types modified to run on xen" 
reasoning for using VIR_DOMAIN_OSTYPE_XEN? Should we really consider PVH a new 
OSTYPE? Your reminder that PV and PVH have a different feature set hints to 
modeling PVH as VIR_DOMAIN_OSTYPE_XENPVH. It is unfortunate that tools above 
libvirt would have to be taught about this new OSTYPE, but that shouldn't 
prevent using VIR_DOMAIN_OSTYPE_XENPVH if in fact it is the best way to model PVH.


Sadly I haven't strongly convinced myself one way or the other. I still like the 
idea of reusing VIR_DOMAIN_OSTYPE_XEN and adding machine "xenpvh" since it seems 
easier from an app perspective, but maybe I just need slapped and admit that PVH 
is a new OSTYPE. (I'm sure you'd like to do the slapping at this point...)


Regards,
Jim

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

[libvirt] [PATCH] qemu: Fix IOThread pids lost after qemuProcessReconnect

2018-10-17 Thread Jie Wang
>From fca1732c0e1f691fb25c614349d5486bbc73a109 Mon Sep 17 00:00:00 2001
From: Jie Wang 
Date: Wed, 17 Oct 2018 22:55:51 +0800
Subject: [PATCH] qemu: Fix IOThread pids lost after qemuProcessReconnect

IOThread pids info will lost after libvirtd restart, then
if we call pinIOThread, sched_setaffinity will be called with
pid 0, not IOThread pid. So pinIOThread cannot work normally.

Signed-off-by: Jie Wang 
---
 src/qemu/qemu_process.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
index 40d35cbe6b..5bf55721d2 100644
--- a/src/qemu/qemu_process.c
+++ b/src/qemu/qemu_process.c
@@ -7650,6 +7650,9 @@ qemuProcessReconnect(void *opaque)
 
 qemuDomainVcpuPersistOrder(obj->def);
 
+if (qemuProcessDetectIOThreadPIDs(driver, obj, QEMU_ASYNC_JOB_NONE) < 0)
+goto error;
+
 if (qemuSecurityReserveLabel(driver->securityManager, obj->def, obj->pid) 
< 0)
 goto error;
 
-- 
2.16.2.windows.1




--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH] network: add prefix to dhcp range of dnsmasq conf file for IPv4 too

2018-10-17 Thread Laine Stump
dnsmasq documentation says that the *IPv4* prefix/network
address/broadcast address sent to dhcp clients will be automatically
determined by dnsmasq by looking at the interface it's listening on,
so the original libvirt code that added dhcp support to virtual
networks did not add a prefix to the dnsmasq commandline (or later,
the dnsmasq conf file).

For *IPv6* however, dnsmasq cannot automatically determine the prefix,
so it must be explicitly provided in the conf file (as a part of the
dhcp-range option). Years after the initial IPv4 support, when IPv6
dhcp support was added, libvirt added the prefix to dhcp-range, but
only for IPv6 (following the "if it ain't broke, don't fix it"
doctrine).

Recently a user reported (privately, because they suspected a possible
security implication, which turned out to be unfounded) a bug on a
host where one of the interfaces was a superset of the libvirt network
where dhcp is needed (e.g., the host's ethernet is 10.0.0.20/8, and
the libvirt network is 10.10.0.1/24). For some reason dnsmasq was
supplying the netmask/broadcast address for the /8 network to clients
requesting an address on the /24 interface.

This seems like a bug in dnsmasq, but even if/when it gets fixed
there, it looks like there is no harm in just adding the prefix to all
dhcp-range options regardless of IPv4 vs IPv6, so that's what this
patch does.

Signed-off-by: Laine Stump 
---
 src/network/bridge_driver.c   | 7 ++-
 tests/networkxml2confdata/dhcp6-nat-network.conf  | 2 +-
 tests/networkxml2confdata/isolated-network.conf   | 2 +-
 tests/networkxml2confdata/nat-network-dns-srv-record-minimal.conf | 2 +-
 tests/networkxml2confdata/nat-network-dns-srv-record.conf | 2 +-
 tests/networkxml2confdata/nat-network-dns-txt-record.conf | 2 +-
 tests/networkxml2confdata/nat-network-name-with-quotes.conf   | 2 +-
 tests/networkxml2confdata/nat-network.conf| 2 +-
 tests/networkxml2confdata/netboot-network.conf| 2 +-
 tests/networkxml2confdata/netboot-proxy-network.conf  | 2 +-
 tests/networkxml2confdata/ptr-domains-auto.conf   | 2 +-
 11 files changed, 12 insertions(+), 15 deletions(-)

diff --git a/src/network/bridge_driver.c b/src/network/bridge_driver.c
index 4bbc4f5a6d..7f5ff79fdc 100644
--- a/src/network/bridge_driver.c
+++ b/src/network/bridge_driver.c
@@ -1416,11 +1416,8 @@ networkDnsmasqConfContents(virNetworkObjPtr obj,
 !(eaddr = virSocketAddrFormat(&ipdef->ranges[r].end)))
 goto cleanup;
 
-virBufferAsprintf(&configbuf, "dhcp-range=%s,%s",
-  saddr, eaddr);
-if (VIR_SOCKET_ADDR_IS_FAMILY(&ipdef->address, AF_INET6))
-virBufferAsprintf(&configbuf, ",%d", prefix);
-virBufferAddLit(&configbuf, "\n");
+virBufferAsprintf(&configbuf, "dhcp-range=%s,%s,%d\n",
+  saddr, eaddr, prefix);
 
 VIR_FREE(saddr);
 VIR_FREE(eaddr);
diff --git a/tests/networkxml2confdata/dhcp6-nat-network.conf 
b/tests/networkxml2confdata/dhcp6-nat-network.conf
index d1058df3b6..e1e110fe23 100644
--- a/tests/networkxml2confdata/dhcp6-nat-network.conf
+++ b/tests/networkxml2confdata/dhcp6-nat-network.conf
@@ -8,7 +8,7 @@ strict-order
 except-interface=lo
 bind-dynamic
 interface=virbr0
-dhcp-range=192.168.122.2,192.168.122.254
+dhcp-range=192.168.122.2,192.168.122.254,24
 dhcp-no-override
 dhcp-authoritative
 dhcp-range=2001:db8:ac10:fd01::1:10,2001:db8:ac10:fd01::1:ff,64
diff --git a/tests/networkxml2confdata/isolated-network.conf 
b/tests/networkxml2confdata/isolated-network.conf
index ce4a59f6c1..d182f42f0a 100644
--- a/tests/networkxml2confdata/isolated-network.conf
+++ b/tests/networkxml2confdata/isolated-network.conf
@@ -10,7 +10,7 @@ bind-interfaces
 listen-address=192.168.152.1
 dhcp-option=3
 no-resolv
-dhcp-range=192.168.152.2,192.168.152.254
+dhcp-range=192.168.152.2,192.168.152.254,24
 dhcp-no-override
 dhcp-authoritative
 dhcp-lease-max=253
diff --git a/tests/networkxml2confdata/nat-network-dns-srv-record-minimal.conf 
b/tests/networkxml2confdata/nat-network-dns-srv-record-minimal.conf
index f35ea1d5d4..678e4a4bfd 100644
--- a/tests/networkxml2confdata/nat-network-dns-srv-record-minimal.conf
+++ b/tests/networkxml2confdata/nat-network-dns-srv-record-minimal.conf
@@ -13,7 +13,7 @@ listen-address=fc00:db8:ac10:fe01::1
 listen-address=fc00:db8:ac10:fd01::1
 listen-address=10.24.10.1
 srv-host=_name._tcp
-dhcp-range=192.168.122.2,192.168.122.254
+dhcp-range=192.168.122.2,192.168.122.254,24
 dhcp-no-override
 dhcp-authoritative
 dhcp-lease-max=253
diff --git a/tests/networkxml2confdata/nat-network-dns-srv-record.conf 
b/tests/networkxml2confdata/nat-network-dns-srv-record.conf
index af1ed70758..4f21eb18b3 100644
--- a/tests/networkxml2confdata/nat-network-dns-srv-record.conf
+++ b/tests/networkx

Re: [libvirt] [PATCH v6 09/13] conf: Allocate/release 'uid' and 'fid' in PCI address

2018-10-17 Thread Andrea Bolognani
On Tue, 2018-10-16 at 11:28 +0800, Yi Min Zhao wrote:
> 在 2018/10/11 下午10:50, Andrea Bolognani 写道:
> > On Fri, 2018-09-28 at 16:46 +0800, Yi Min Zhao wrote:
> > >   # conf/device_conf.h
> > > +virDeviceInfoPCIAddressExtensionIsPresent;
> > > +virDeviceInfoPCIAddressExtensionIsWanted;
> > >   virDeviceInfoPCIAddressIsPresent;
> > >   virDeviceInfoPCIAddressIsWanted;
> > >   virDomainDeviceAddressIsValid;
> > > @@ -119,6 +121,9 @@ virDomainCCWAddressSetFree;
> > >   virDomainPCIAddressBusIsFullyReserved;
> > >   virDomainPCIAddressBusSetModel;
> > >   virDomainPCIAddressEnsureAddr;
> > > +virDomainPCIAddressExtensionReleaseAddr;
> > > +virDomainPCIAddressExtensionReserveAddr;
> > > +virDomainPCIAddressExtensionReserveNextAddr;
> > 
> > I'm not quite quire we need to export these functions.
> > 
> > With the recent changes, we've gotten to the point that we're not
> > even passing a virZPCIDeviceAddress to them, but rather they have
> > the very same signature as the regular virPCIDeviceAddress...
> > 
> > So it should be possible to just make them static and only call
> > them from the virDomainPCIAddressReserveAddr() and friends, right?
> > Which is where I was hoping we could eventually get. Or did I
> > miss something?
> 
> I think this would make things complex. If either PCI address or
> zPCI address exists, we have to do more checks for calling
> virDomainPCIAddressReserveAddr(). And there are amounts of
> code calling ***IsWanted() to call ***ReserveNext***(). I think
> keeping them separately is better.

Again, I might be missing something because I haven't actually tried
implementing any of this, but at least from the theoretical point of
view I don't see how keeping them separate would make things simpler:
if anything, it seems to me like it would make them more complicated
for the calling code because now you have to worry about the PCI
address extensions *in addition* to the PCI address itself.

-- 
Andrea Bolognani / Red Hat / Virtualization

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

[libvirt] [PATCH 2/2] Revert "qemu: Forbid pinning vCPUs for TCG domain"

2018-10-17 Thread Daniel P . Berrangé
This reverts commit 8b035c84d8a7362a87a95e6114b8e7f959685ed9.

The MTTCG impl in QEMU does allow pinning vCPUs.

When the guest is running we already check if pinning is
possible in the qemuDomainPinVcpuLive method, so this
check was adding no benefit.

When the guest is not running, we cannot know whether the
subsequent launch will use MTTCG or TCG, so we must allow
the pinning request. If the guest does use TCG on the next
launch it will fail, but this is no worse than if the user
had done a virDomainDefineXML with an XML doc specifying
vCPU pinning.

Signed-off-by: Daniel P. Berrangé 
---
 src/qemu/qemu_driver.c | 7 ---
 1 file changed, 7 deletions(-)

diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index a52e2495d5..981db01059 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -5111,13 +5111,6 @@ qemuDomainPinVcpuFlags(virDomainPtr dom,
 if (virDomainObjGetDefs(vm, flags, &def, &persistentDef) < 0)
 goto endjob;
 
-if ((def && def->virtType == VIR_DOMAIN_VIRT_QEMU) ||
-(persistentDef && persistentDef->virtType == VIR_DOMAIN_VIRT_QEMU)) {
-virReportError(VIR_ERR_OPERATION_FAILED, "%s",
-   _("Virt type 'qemu' does not support vCPU pinning"));
-goto endjob;
-}
-
 if (persistentDef &&
 !(vcpuinfo = virDomainDefGetVcpu(persistentDef, vcpu))) {
 virReportError(VIR_ERR_INVALID_ARG,
-- 
2.17.2

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

[libvirt] [PATCH 0/2] Fix compatibility with MTTCG

2018-10-17 Thread Daniel P . Berrangé
QEMU is using MTTCG by default on an increasingly large
set of host/guest combinations. This allows us to use the
normal vCPU pinning support we already have for KVM. We
just need to stop throwing away the PID info, and stop
artificially blocking pinning APIs.

Daniel P. Berrangé (2):
  qemu: fix recording of vCPU pids for MTTCG
  Revert "qemu: Forbid pinning vCPUs for TCG domain"

 src/qemu/qemu_domain.c | 81 ++
 src/qemu/qemu_driver.c |  7 
 2 files changed, 51 insertions(+), 37 deletions(-)

-- 
2.17.2

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

[libvirt] [PATCH 1/2] qemu: fix recording of vCPU pids for MTTCG

2018-10-17 Thread Daniel P . Berrangé
MTTCG is the new multi-threaded impl of TCG which follows
KVM in having one host OS thread per vCPU. Historically
we have discarded all PIDs reported for TCG guests, but
we must now selectively honour this data.

We don't have anything in the domain XML that indicates
whether a guest is using TCG or MTTCG. While QEMU does
have an option (-accel tcg,thread=single|multi), it is
not desirable to expose this in libvirt. QEMU will
automatically use MTTCG when the host/guest architecture
pairing is known to be safe. Only developers of QEMU TCG
have a strong reason to override this logic.

Thus we use two sanity checks to decide if the vCPU
PID information is usable. First we see if the PID
duplicates the main emulator PID, and second we see
if the PID duplicates any other vCPUs.

Signed-off-by: Daniel P. Berrangé 
---
 src/qemu/qemu_domain.c | 81 ++
 1 file changed, 51 insertions(+), 30 deletions(-)

diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
index f00f1b3fdb..c7a0c03e3f 100644
--- a/src/qemu/qemu_domain.c
+++ b/src/qemu/qemu_domain.c
@@ -10326,9 +10326,10 @@ qemuDomainRefreshVcpuInfo(virQEMUDriverPtr driver,
 qemuDomainVcpuPrivatePtr vcpupriv;
 qemuMonitorCPUInfoPtr info = NULL;
 size_t maxvcpus = virDomainDefGetVcpusMax(vm->def);
-size_t i;
+size_t i, j;
 bool hotplug;
 bool fast;
+bool validTIDs = true;
 int rc;
 int ret = -1;
 
@@ -10336,6 +10337,8 @@ qemuDomainRefreshVcpuInfo(virQEMUDriverPtr driver,
 fast = virQEMUCapsGet(QEMU_DOMAIN_PRIVATE(vm)->qemuCaps,
   QEMU_CAPS_QUERY_CPUS_FAST);
 
+VIR_DEBUG("Maxvcpus %zu hotplug %d fast query %d", maxvcpus, hotplug, 
fast);
+
 if (qemuDomainObjEnterMonitorAsync(driver, vm, asyncJob) < 0)
 return -1;
 
@@ -10348,39 +10351,57 @@ qemuDomainRefreshVcpuInfo(virQEMUDriverPtr driver,
 if (rc < 0)
 goto cleanup;
 
+/*
+ * The query-cpus[-fast] commands return information
+ * about the vCPUs, including the OS level PID that
+ * is executing the vCPU.
+ *
+ * For KVM there is always a 1-1 mapping between
+ * vCPUs and host OS PIDs.
+ *
+ * For TCG things are a little more complicated.
+ *
+ *  - In some cases the vCPUs will all have the same
+ *PID as the main emulator thread.
+ *  - In some cases the first vCPU will have a distinct
+ *PID, but other vCPUs will share the emulator thread
+ *
+ * For MTTCG, things work the same as KVM, with each
+ * vCPU getting its own PID.
+ *
+ * We use the Host OS PIDs for doing vCPU pinning
+ * and reporting. The TCG data reporting will result
+ * in bad behaviour such as pinning the wrong PID.
+ * We must thus detect and discard bogus PID info
+ * from TCG, while still honouring the modern MTTCG
+ * impl which we can support.
+ */
+for (i = 0; i < maxvcpus && validTIDs; i++) {
+if (info[i].tid == vm->pid) {
+VIR_DEBUG("vCPU[%zu] PID %llu duplicates process",
+  i, (unsigned long long)info[i].tid);
+validTIDs = false;
+}
+
+for (j = 0; j < i; j++) {
+if (info[i].tid == info[j].tid) {
+VIR_DEBUG("vCPU[%zu] PID %llu duplicates vCPU[%zu]",
+  i, (unsigned long long)info[i].tid, j);
+validTIDs = false;
+}
+}
+
+if (validTIDs)
+VIR_DEBUG("vCPU[%zu] PID %llu is valid",
+  i, (unsigned long long)info[i].tid);
+}
+
+VIR_DEBUG("Extracting vCPU information validTIDs=%d", validTIDs);
 for (i = 0; i < maxvcpus; i++) {
 vcpu = virDomainDefGetVcpu(vm->def, i);
 vcpupriv = QEMU_DOMAIN_VCPU_PRIVATE(vcpu);
 
-/*
- * Current QEMU *can* report info about host threads mapped
- * to vCPUs, but it is not in a manner we can correctly
- * deal with. The TCG CPU emulation does have a separate vCPU
- * thread, but it runs every vCPU in that same thread. So it
- * is impossible to setup different affinity per thread.
- *
- * What's more the 'query-cpus[-fast]' command returns bizarre
- * data for the threads. It gives the TCG thread for the
- * vCPU 0, but for vCPUs 1-> N, it actually replies with
- * the main process thread ID.
- *
- * The result is that when we try to set affinity for
- * vCPU 1, it will actually change the affinity of the
- * emulator thread :-( When you try to set affinity for
- * vCPUs 2, 3 it will fail if the affinity was
- * different from vCPU 1.
- *
- * We *could* allow vcpu pinning with TCG, if we made the
- * restriction that all vCPUs had the same mask. This would
- * at least let us separate emulator from vCPUs threads, as
- * we do for KVM. It would need some changes to our cg

Re: [libvirt] [PATCH v3 4/6] chardev: add support for authorization for TLS clients

2018-10-17 Thread Juan Quintela
Daniel P. Berrangé  wrote:
> From: "Daniel P. Berrange" 
>
> Currently any client which can complete the TLS handshake is able to use
> a chardev server. The server admin can turn on the 'verify-peer' option
> for the x509 creds to require the client to provide a x509
> certificate. This means the client will have to acquire a certificate
> from the CA before they are permitted to use the chardev server. This is
> still a fairly low bar.
>
> This adds a 'tls-authz=OBJECT-ID' option to the socket chardev backend
> which takes the ID of a previously added 'QAuthZ' object instance. This
> will be used to validate the client's x509 distinguished name. Clients
> failing the check will not be permitted to use the chardev server.
>
> For example to setup authorization that only allows connection from a
> client whose x509 certificate distinguished name contains 'CN=fred', you
> would use:
>
>   $QEMU -object tls-creds-x509,id=tls0,dir=/home/berrange/qemutls,\
> endpoint=server,verify-peer=yes \
> -object authz-simple,id=authz0,identity=CN=laptop.example.com,,\
> O=Example Org,,L=London,,ST=London,,C=GB \
> -chardev socket,host=127.0.0.1,port=9000,server,\
>tls-creds=tls0,tls-authz=authz0 \
> ...other qemu args...
>
> Signed-off-by: Daniel P. Berrange 

Reviewed-by: Juan Quintela 

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Re: [libvirt] [PATCH v3 2/6] nbd: allow authorization with nbd-server-start QMP command

2018-10-17 Thread Juan Quintela
Daniel P. Berrangé  wrote:
> From: "Daniel P. Berrange" 
>
> As with the previous patch to qemu-nbd, the nbd-server-start QMP command
> also needs to be able to specify authorization when enabling TLS encryption.
>
> First the client must create a QAuthZ object instance using the
> 'object-add' command:
>
>{
>  'execute': 'object-add',
>  'arguments': {
>'qom-type': 'authz-list',
>'id': 'authz0',
>'parameters': {
>  'policy': 'deny',
>  'rules': [
>{
>  'match': '*CN=fred',
>  'policy': 'allow'
>}
>  ]
>}
>  }
>}
>
> They can then reference this in the new 'tls-authz' parameter when
> executing the 'nbd-server-start' command:
>
>{
>  'execute': 'nbd-server-start',
>  'arguments': {
>'addr': {
>'type': 'inet',
>'host': '127.0.0.1',
>'port': '9000'
>},
>'tls-creds': 'tls0',
>'tls-authz': 'authz0'
>  }
>}
>
> Signed-off-by: Daniel P. Berrange 

Reviewed-by: Juan Quintela 

similar to previous patch in series.

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Re: [libvirt] [PATCH v3 3/6] migration: add support for a "tls-authz" migration parameter

2018-10-17 Thread Juan Quintela
Daniel P. Berrangé  wrote:
> From: "Daniel P. Berrange" 
>
> The QEMU instance that runs as the server for the migration data
> transport (ie the target QEMU) needs to be able to configure access
> control so it can prevent unauthorized clients initiating an incoming
> migration. This adds a new 'tls-authz' migration parameter that is used
> to provide the QOM ID of a QAuthZ subclass instance that provides the
> access control check. This is checked against the x509 certificate
> obtained during the TLS handshake.
>
> For example, when starting a QEMU for incoming migration, it is
> possible to give an example identity of the source QEMU that is
> intended to be connecting later:
>
>   $QEMU \
>  -monitor stdio \
>  -incoming defer \
>  ...other args...
>
>   (qemu) object_add tls-creds-x509,id=tls0,dir=/home/berrange/qemutls,\
>  endpoint=server,verify-peer=yes \
>   (qemu) object_add authz-simple,id=auth0,identity=CN=laptop.example.com,,\
>  O=Example Org,,L=London,,ST=London,,C=GB \
>   (qemu) migrate_incoming tcp:localhost:9000
>
> Signed-off-by: Daniel P. Berrange 

Reviewed-by: Juan Quintela 

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Re: [libvirt] [PATCH v3 1/6] qemu-nbd: add support for authorization of TLS clients

2018-10-17 Thread Juan Quintela
Daniel P. Berrangé  wrote:
> From: "Daniel P. Berrange" 
>
> Currently any client which can complete the TLS handshake is able to use
> the NBD server. The server admin can turn on the 'verify-peer' option
> for the x509 creds to require the client to provide a x509 certificate.
> This means the client will have to acquire a certificate from the CA
> before they are permitted to use the NBD server. This is still a fairly
> low bar to cross.
>
> This adds a '--tls-authz OBJECT-ID' option to the qemu-nbd command which
> takes the ID of a previously added 'QAuthZ' object instance. This will
> be used to validate the client's x509 distinguished name. Clients
> failing the authorization check will not be permitted to use the NBD
> server.
>
> For example to setup authorization that only allows connection from a client
> whose x509 certificate distinguished name is
>
>CN=laptop.example.com,O=Example Org,L=London,ST=London,C=GB
>
> use:
>
>   qemu-nbd --object tls-creds-x509,id=tls0,dir=/home/berrange/qemutls,\
> endpoint=server,verify-peer=yes \
>--object authz-simple,id=auth0,identity=CN=laptop.example.com,,\
> O=Example Org,,L=London,,ST=London,,C=GB \
>--tls-creds tls0 \
>--tls-authz authz0
>  other qemu-nbd args...
>
> Signed-off-by: Daniel P. Berrange 

Reviewed-by: Juan Quintela 

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Re: [libvirt] [PATCH v3 04/13] security_manager: Rework metadata locking

2018-10-17 Thread Michal Privoznik
On 10/17/2018 01:46 PM, Daniel P. Berrangé wrote:
> On Wed, Oct 17, 2018 at 12:52:50PM +0200, Michal Privoznik wrote:
>> On 10/17/2018 11:45 AM, Daniel P. Berrangé wrote:
>>> On Wed, Oct 17, 2018 at 11:06:46AM +0200, Michal Privoznik wrote:
 Trying to use virlockd to lock metadata turns out to be too big
 gun. Since we will always spawn a separate process for relabeling
 we are safe to use thread unsafe POSIX locks and take out
 virtlockd completely out of the picture.
>>>
>>> Note safety of POSIX locks is not merely about other threads.
>>>
>>> If *any* code we invoke from security_dac or security_selinux
>>> were to open() & close() the file we're labelling we'll
>>> loose the locks.
>>>
>>> This is a little concerning since we call out to a few functions
>>> in src/util that might not be aware they should *never* open)(
>>> and close() the path they're given. It looks like we lucky at
>>> the moment, but this has future gun -> foot -> ouch potential.
>>>
>>> Likewise we also call out to libselinux APIs. I've looked at
>>> the code for the APIs we call and again, right now, we look
>>> to be safe, but there's some risk there.
>>>
>>> I'm not sure whether this is big enough risk to invalidate this
>>> forking approach or not though.  This kind of problem was the
>>> reason why virtlockd was created to hold locks in a completely
>>> separate process from the code with the critical section.
>>
>> But unless we check for hard links virtlockd will suffer from the POSIX
>> locks too. For instance, assume that B is a hardlink to A. Then, if a
>> process opens A, locks it an the other opens B and closes it the lock is
>> released. Even though A and B look distinct.
> 
> Yes, that is correct - there's an implicit assumption that all VMs
> are using the same path if there's multiple hard links. Foruntately
> this scenario is fairly rare - more common is symlinks which can be
> canonicalized.
> 
>> The problem with virlockd is that our current virlockspace impl is not
>> aware of different offsets. We can try to make it so, but that would
>> complicate the code a lot. Just imagine that there's a domain already
>> running and it holds disk content lock over file C. If an user wants to
>> start a domain (which too has disk C), libvirtd will try to lock C again
>> (even though on different offset) but it will fail to do so because C is
>> already locked.
> 
> Yep, we would need intelligence around offsets to avoid opening it
> twice for each offset.
> 
>> Another problem with virtlockd that I faced was that with current
>> implementation the connection to virlockd is opened in metadataLock(),
>> where the connection FD is dup()-ed to prevent connection closing. The
>> connection is then closed in metadataUnlock() where the dup()-ed FD is
>> closed. This works pretty much good, except for fork(). If a fork()
>> occurs between metadataLock() and metadataUnlock() the duplicated FD is
>> leaked into the child and we can't be certain when will the child close
>> it. The worst case scenario is that the child will close the FD when we
>> are holding some resources, which results in virtlockd killing libvirtd
>> (= registered owner of the locks).
> 
> I don't think that's the case actually. If you have an FD that is a
> UNIX/TCP socket that gets dup(), the remote end of the socket won't
> see HUP until all duplicates of the socket are closed. IOW, both the
> parent & child process would need to close their respective dups.

It is the case. I've seen it out in the wild (when starting several
domains at once):

https://www.redhat.com/archives/libvir-list/2018-September/msg01138.html

> 
>> Alternatively, we can switch to OFD locks and have the feature work only
>> on Linux. If anybody from BSD users will want the feature they will have
>> to push their kernel developers to implement some sensible file locking.
>>
>> Also, one of the questions is what we want metadata locking to be. So
>> far I am aiming on having exclusive access to the file that we are
>> chown()-ing (or setfcon()-ing) so that I can put a code around it that
>> manipulates XATTR where the original owner would be stored. This way the
>> metadata lock is held only for a fraction of a second.
>> I guess we don't want the metadata lock be held through whole run of a
>> domain, i.e. obtained at domain start and released at domain shutdown. I
>> don't see much benefit in that.
> 
> Yes, we must only lock it for the short time that we're reading or writing
> the metadata. We must not hold the long long term, because other threads
> or processes need access to the metadata while the VM is running, for
> example to deal with validating shared disk labels.
> 
> 
> On further reflection, I think we'll be safe enough with what you're
> proposing. For it to go wrong, we would have to have a bug in the code
> which accidentally open+close the socket, and be on an old kernel that
> lacks OFD locks, and two processes must be in the critical section at
> the sa

Re: [libvirt] [PATCH] util: Fix a typo in comments of virresctrl.c

2018-10-17 Thread Andrea Bolognani
On Wed, 2018-10-17 at 16:27 +0800, Wang Huaqiang wrote:
> Signed-off-by: Wang Huaqiang 
> ---
>  src/util/virresctrl.c | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)

Reviewed-by: Andrea Bolognani 

and pushed.

-- 
Andrea Bolognani / Red Hat / Virtualization

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH v3 04/13] security_manager: Rework metadata locking

2018-10-17 Thread Daniel P . Berrangé
On Wed, Oct 17, 2018 at 12:52:50PM +0200, Michal Privoznik wrote:
> On 10/17/2018 11:45 AM, Daniel P. Berrangé wrote:
> > On Wed, Oct 17, 2018 at 11:06:46AM +0200, Michal Privoznik wrote:
> >> Trying to use virlockd to lock metadata turns out to be too big
> >> gun. Since we will always spawn a separate process for relabeling
> >> we are safe to use thread unsafe POSIX locks and take out
> >> virtlockd completely out of the picture.
> > 
> > Note safety of POSIX locks is not merely about other threads.
> > 
> > If *any* code we invoke from security_dac or security_selinux
> > were to open() & close() the file we're labelling we'll
> > loose the locks.
> > 
> > This is a little concerning since we call out to a few functions
> > in src/util that might not be aware they should *never* open)(
> > and close() the path they're given. It looks like we lucky at
> > the moment, but this has future gun -> foot -> ouch potential.
> > 
> > Likewise we also call out to libselinux APIs. I've looked at
> > the code for the APIs we call and again, right now, we look
> > to be safe, but there's some risk there.
> > 
> > I'm not sure whether this is big enough risk to invalidate this
> > forking approach or not though.  This kind of problem was the
> > reason why virtlockd was created to hold locks in a completely
> > separate process from the code with the critical section.
> 
> But unless we check for hard links virtlockd will suffer from the POSIX
> locks too. For instance, assume that B is a hardlink to A. Then, if a
> process opens A, locks it an the other opens B and closes it the lock is
> released. Even though A and B look distinct.

Yes, that is correct - there's an implicit assumption that all VMs
are using the same path if there's multiple hard links. Foruntately
this scenario is fairly rare - more common is symlinks which can be
canonicalized.

> The problem with virlockd is that our current virlockspace impl is not
> aware of different offsets. We can try to make it so, but that would
> complicate the code a lot. Just imagine that there's a domain already
> running and it holds disk content lock over file C. If an user wants to
> start a domain (which too has disk C), libvirtd will try to lock C again
> (even though on different offset) but it will fail to do so because C is
> already locked.

Yep, we would need intelligence around offsets to avoid opening it
twice for each offset.

> Another problem with virtlockd that I faced was that with current
> implementation the connection to virlockd is opened in metadataLock(),
> where the connection FD is dup()-ed to prevent connection closing. The
> connection is then closed in metadataUnlock() where the dup()-ed FD is
> closed. This works pretty much good, except for fork(). If a fork()
> occurs between metadataLock() and metadataUnlock() the duplicated FD is
> leaked into the child and we can't be certain when will the child close
> it. The worst case scenario is that the child will close the FD when we
> are holding some resources, which results in virtlockd killing libvirtd
> (= registered owner of the locks).

I don't think that's the case actually. If you have an FD that is a
UNIX/TCP socket that gets dup(), the remote end of the socket won't
see HUP until all duplicates of the socket are closed. IOW, both the
parent & child process would need to close their respective dups.

> Alternatively, we can switch to OFD locks and have the feature work only
> on Linux. If anybody from BSD users will want the feature they will have
> to push their kernel developers to implement some sensible file locking.
> 
> Also, one of the questions is what we want metadata locking to be. So
> far I am aiming on having exclusive access to the file that we are
> chown()-ing (or setfcon()-ing) so that I can put a code around it that
> manipulates XATTR where the original owner would be stored. This way the
> metadata lock is held only for a fraction of a second.
> I guess we don't want the metadata lock be held through whole run of a
> domain, i.e. obtained at domain start and released at domain shutdown. I
> don't see much benefit in that.

Yes, we must only lock it for the short time that we're reading or writing
the metadata. We must not hold the long long term, because other threads
or processes need access to the metadata while the VM is running, for
example to deal with validating shared disk labels.


On further reflection, I think we'll be safe enough with what you're
proposing. For it to go wrong, we would have to have a bug in the code
which accidentally open+close the socket, and be on an old kernel that
lacks OFD locks, and two processes must be in the critical section at
the same time in that short < 1 second window.

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

Re: [libvirt] [PATCH v3 04/13] security_manager: Rework metadata locking

2018-10-17 Thread Michal Privoznik
On 10/17/2018 11:45 AM, Daniel P. Berrangé wrote:
> On Wed, Oct 17, 2018 at 11:06:46AM +0200, Michal Privoznik wrote:
>> Trying to use virlockd to lock metadata turns out to be too big
>> gun. Since we will always spawn a separate process for relabeling
>> we are safe to use thread unsafe POSIX locks and take out
>> virtlockd completely out of the picture.
> 
> Note safety of POSIX locks is not merely about other threads.
> 
> If *any* code we invoke from security_dac or security_selinux
> were to open() & close() the file we're labelling we'll
> loose the locks.
> 
> This is a little concerning since we call out to a few functions
> in src/util that might not be aware they should *never* open)(
> and close() the path they're given. It looks like we lucky at
> the moment, but this has future gun -> foot -> ouch potential.
> 
> Likewise we also call out to libselinux APIs. I've looked at
> the code for the APIs we call and again, right now, we look
> to be safe, but there's some risk there.
> 
> I'm not sure whether this is big enough risk to invalidate this
> forking approach or not though.  This kind of problem was the
> reason why virtlockd was created to hold locks in a completely
> separate process from the code with the critical section.

But unless we check for hard links virtlockd will suffer from the POSIX
locks too. For instance, assume that B is a hardlink to A. Then, if a
process opens A, locks it an the other opens B and closes it the lock is
released. Even though A and B look distinct.

The problem with virlockd is that our current virlockspace impl is not
aware of different offsets. We can try to make it so, but that would
complicate the code a lot. Just imagine that there's a domain already
running and it holds disk content lock over file C. If an user wants to
start a domain (which too has disk C), libvirtd will try to lock C again
(even though on different offset) but it will fail to do so because C is
already locked.

Another problem with virtlockd that I faced was that with current
implementation the connection to virlockd is opened in metadataLock(),
where the connection FD is dup()-ed to prevent connection closing. The
connection is then closed in metadataUnlock() where the dup()-ed FD is
closed. This works pretty much good, except for fork(). If a fork()
occurs between metadataLock() and metadataUnlock() the duplicated FD is
leaked into the child and we can't be certain when will the child close
it. The worst case scenario is that the child will close the FD when we
are holding some resources, which results in virtlockd killing libvirtd
(= registered owner of the locks).

Alternatively, we can switch to OFD locks and have the feature work only
on Linux. If anybody from BSD users will want the feature they will have
to push their kernel developers to implement some sensible file locking.

Also, one of the questions is what we want metadata locking to be. So
far I am aiming on having exclusive access to the file that we are
chown()-ing (or setfcon()-ing) so that I can put a code around it that
manipulates XATTR where the original owner would be stored. This way the
metadata lock is held only for a fraction of a second.
I guess we don't want the metadata lock be held through whole run of a
domain, i.e. obtained at domain start and released at domain shutdown. I
don't see much benefit in that.

Michal

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Re: [libvirt] [PATCH v3 04/13] security_manager: Rework metadata locking

2018-10-17 Thread Daniel P . Berrangé
On Wed, Oct 17, 2018 at 11:06:46AM +0200, Michal Privoznik wrote:
> Trying to use virlockd to lock metadata turns out to be too big
> gun. Since we will always spawn a separate process for relabeling
> we are safe to use thread unsafe POSIX locks and take out
> virtlockd completely out of the picture.

Note safety of POSIX locks is not merely about other threads.

If *any* code we invoke from security_dac or security_selinux
were to open() & close() the file we're labelling we'll
loose the locks.

This is a little concerning since we call out to a few functions
in src/util that might not be aware they should *never* open)(
and close() the path they're given. It looks like we lucky at
the moment, but this has future gun -> foot -> ouch potential.

Likewise we also call out to libselinux APIs. I've looked at
the code for the APIs we call and again, right now, we look
to be safe, but there's some risk there.

I'm not sure whether this is big enough risk to invalidate this
forking approach or not though.  This kind of problem was the
reason why virtlockd was created to hold locks in a completely
separate process from the code with the critical section.

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH v3 01/13] virprocess: Introduce virProcessRunInFork

2018-10-17 Thread Daniel P . Berrangé
On Wed, Oct 17, 2018 at 11:06:43AM +0200, Michal Privoznik wrote:
> This new helper can be used to spawn a child process and run
> passed callback from it. This will come handy esp. if the
> callback is not thread safe.
> 
> Signed-off-by: Michal Privoznik 
> ---
>  src/libvirt_private.syms |  1 +
>  src/util/virprocess.c| 81 
>  src/util/virprocess.h| 16 
>  3 files changed, 98 insertions(+)

> +/**
> + * virProcessRunInFork:
> + * @cb: callback to run
> + * @opaque: opaque data to @cb
> + *
> + * Do the fork and run @cb in the child. This can be used when
> + * @cb does something thread unsafe, for instance. However, due
> + * to possible signal propagation @cb should only call async
> + * signal safe functions. On return, the returned value is either
> + * -1 with error message reported if something went bad in the
> + *  parent, if child has died from a signal or if the child
> + *  returned EXIT_CANCELED. Otherwise the returned value is the
> + *  exit status of the child.

Instead of;

  However, due to possible signal propagation @cb should only call
  async signal safe functions.

Use:

  All signals will be reset to have their platform default handlers
  and unmasked. @cb must only use async signal safe functions. In
  particular no mutexes should be used in @cb, unless steps were
  taken before forking to guarantee a predictable state. @cb must
  not exec any external binaries, instead virCommand/virExec should
  be used for that purpose.


> + */
> +int
> +virProcessRunInFork(virProcessForkCallback cb,
> +void *opaque)
> +{
> +int ret = -1;
> +pid_t child = -1;
> +pid_t parent = getpid();
> +int errfd[2] = { -1, -1 };
> +
> +if (pipe2(errfd, O_CLOEXEC) < 0) {
> +virReportSystemError(errno, "%s",
> + _("Cannot create pipe for child"));
> +return -1;
> +}
> +
> +if ((child = virFork()) < 0)
> +goto cleanup;
> +
> +if (child == 0) {
> +VIR_FORCE_CLOSE(errfd[0]);
> +ret = virProcessForkHelper(errfd[1], parent, cb, opaque);
> +VIR_FORCE_CLOSE(errfd[1]);
> +_exit(ret < 0 ? EXIT_CANCELED : ret);
> +} else {
> +int status;
> +VIR_AUTOFREE(char *) buf = NULL;
> +
> +VIR_FORCE_CLOSE(errfd[1]);
> +ignore_value(virFileReadHeaderFD(errfd[0], 1024, &buf));
> +ret = virProcessWait(child, &status, false);
> +if (!ret) {
> +ret = status == EXIT_CANCELED ? -1 : status;
> +if (ret) {
> +virReportError(VIR_ERR_INTERNAL_ERROR,
> +   _("child reported: %s"),
> +   NULLSTR(buf));

It would be desirable to include the exit status value in the message

> +}
> +}
> +}
> +
> + cleanup:
> +VIR_FORCE_CLOSE(errfd[0]);
> +VIR_FORCE_CLOSE(errfd[1]);
> +return ret;
> +}

Regards,
Daniel
-- 
|: https://berrange.com  -o-https://www.flickr.com/photos/dberrange :|
|: https://libvirt.org -o-https://fstop138.berrange.com :|
|: https://entangle-photo.org-o-https://www.instagram.com/dberrange :|

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH v3 13/13] Revert "virlockspace: Allow caller to specify start and length offset in virLockSpaceAcquireResource"

2018-10-17 Thread Michal Privoznik
This reverts commit afd5a27575e8b6a494d2728552fe0e89c71e32b4.

Signed-off-by: Michal Privoznik 
---
 src/locking/lock_daemon_dispatch.c |  3 ---
 src/util/virlockspace.c| 15 +--
 src/util/virlockspace.h|  4 
 tests/virlockspacetest.c   | 29 +
 4 files changed, 10 insertions(+), 41 deletions(-)

diff --git a/src/locking/lock_daemon_dispatch.c 
b/src/locking/lock_daemon_dispatch.c
index 10248ec0b5..1b479db55d 100644
--- a/src/locking/lock_daemon_dispatch.c
+++ b/src/locking/lock_daemon_dispatch.c
@@ -50,8 +50,6 @@ virLockSpaceProtocolDispatchAcquireResource(virNetServerPtr 
server ATTRIBUTE_UNU
 virNetServerClientGetPrivateData(client);
 virLockSpacePtr lockspace;
 unsigned int newFlags;
-off_t start = 0;
-off_t len = 1;
 
 virMutexLock(&priv->lock);
 
@@ -86,7 +84,6 @@ virLockSpaceProtocolDispatchAcquireResource(virNetServerPtr 
server ATTRIBUTE_UNU
 if (virLockSpaceAcquireResource(lockspace,
 args->name,
 priv->ownerPid,
-start, len,
 newFlags) < 0)
 goto cleanup;
 
diff --git a/src/util/virlockspace.c b/src/util/virlockspace.c
index 79fa48d365..0736b4b85b 100644
--- a/src/util/virlockspace.c
+++ b/src/util/virlockspace.c
@@ -115,10 +115,8 @@ static void 
virLockSpaceResourceFree(virLockSpaceResourcePtr res)
 static virLockSpaceResourcePtr
 virLockSpaceResourceNew(virLockSpacePtr lockspace,
 const char *resname,
-pid_t owner,
-off_t start,
-off_t len,
-unsigned int flags)
+unsigned int flags,
+pid_t owner)
 {
 virLockSpaceResourcePtr res;
 bool shared = !!(flags & VIR_LOCK_SPACE_ACQUIRE_SHARED);
@@ -159,7 +157,7 @@ virLockSpaceResourceNew(virLockSpacePtr lockspace,
 goto error;
 }
 
-if (virFileLock(res->fd, shared, start, len, false) < 0) {
+if (virFileLock(res->fd, shared, 0, 1, false) < 0) {
 if (errno == EACCES || errno == EAGAIN) {
 virReportError(VIR_ERR_RESOURCE_BUSY,
_("Lockspace resource '%s' is locked"),
@@ -206,7 +204,7 @@ virLockSpaceResourceNew(virLockSpacePtr lockspace,
 goto error;
 }
 
-if (virFileLock(res->fd, shared, start, len, false) < 0) {
+if (virFileLock(res->fd, shared, 0, 1, false) < 0) {
 if (errno == EACCES || errno == EAGAIN) {
 virReportError(VIR_ERR_RESOURCE_BUSY,
_("Lockspace resource '%s' is locked"),
@@ -614,8 +612,6 @@ int virLockSpaceDeleteResource(virLockSpacePtr lockspace,
 int virLockSpaceAcquireResource(virLockSpacePtr lockspace,
 const char *resname,
 pid_t owner,
-off_t start,
-off_t len,
 unsigned int flags)
 {
 int ret = -1;
@@ -645,8 +641,7 @@ int virLockSpaceAcquireResource(virLockSpacePtr lockspace,
 goto cleanup;
 }
 
-if (!(res = virLockSpaceResourceNew(lockspace, resname,
-owner, start, len, flags)))
+if (!(res = virLockSpaceResourceNew(lockspace, resname, flags, owner)))
 goto cleanup;
 
 if (virHashAddEntry(lockspace->resources, resname, res) < 0) {
diff --git a/src/util/virlockspace.h b/src/util/virlockspace.h
index 24f2c89be6..041cf20396 100644
--- a/src/util/virlockspace.h
+++ b/src/util/virlockspace.h
@@ -22,8 +22,6 @@
 #ifndef __VIR_LOCK_SPACE_H__
 # define __VIR_LOCK_SPACE_H__
 
-# include 
-
 # include "internal.h"
 # include "virjson.h"
 
@@ -52,8 +50,6 @@ typedef enum {
 int virLockSpaceAcquireResource(virLockSpacePtr lockspace,
 const char *resname,
 pid_t owner,
-off_t start,
-off_t len,
 unsigned int flags);
 
 int virLockSpaceReleaseResource(virLockSpacePtr lockspace,
diff --git a/tests/virlockspacetest.c b/tests/virlockspacetest.c
index 3c621e7eb0..93353be285 100644
--- a/tests/virlockspacetest.c
+++ b/tests/virlockspacetest.c
@@ -98,8 +98,6 @@ static int testLockSpaceResourceLockExcl(const void *args 
ATTRIBUTE_UNUSED)
 {
 virLockSpacePtr lockspace;
 int ret = -1;
-const off_t start = 0;
-const off_t len = 1;
 
 rmdir(LOCKSPACE_DIR);
 
@@ -112,13 +110,13 @@ static int testLockSpaceResourceLockExcl(const void *args 
ATTRIBUTE_UNUSED)
 if (virLockSpaceCreateResource(lockspace, "foo") < 0)
 goto cleanup;
 
-if (virLockSpaceAcquireResource(lockspace, "foo", geteuid(), start, len

[libvirt] [PATCH v3 12/13] Revert "lock_driver_lockd: Introduce VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_METADATA flag"

2018-10-17 Thread Michal Privoznik
This reverts commit 21c34b86be5233634eb38f77be64e2263bfc4e48.

Signed-off-by: Michal Privoznik 
---
 src/locking/lock_daemon_dispatch.c | 10 ++
 src/locking/lock_driver_lockd.c|  3 +--
 src/locking/lock_driver_lockd.h|  1 -
 3 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/src/locking/lock_daemon_dispatch.c 
b/src/locking/lock_daemon_dispatch.c
index a683ad3d6b..10248ec0b5 100644
--- a/src/locking/lock_daemon_dispatch.c
+++ b/src/locking/lock_daemon_dispatch.c
@@ -37,9 +37,6 @@ VIR_LOG_INIT("locking.lock_daemon_dispatch");
 
 #include "lock_daemon_dispatch_stubs.h"
 
-#define DEFAULT_OFFSET 0
-#define METADATA_OFFSET 1
-
 static int
 virLockSpaceProtocolDispatchAcquireResource(virNetServerPtr server 
ATTRIBUTE_UNUSED,
 virNetServerClientPtr client,
@@ -53,14 +50,13 @@ virLockSpaceProtocolDispatchAcquireResource(virNetServerPtr 
server ATTRIBUTE_UNU
 virNetServerClientGetPrivateData(client);
 virLockSpacePtr lockspace;
 unsigned int newFlags;
-off_t start = DEFAULT_OFFSET;
+off_t start = 0;
 off_t len = 1;
 
 virMutexLock(&priv->lock);
 
 virCheckFlagsGoto(VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_SHARED |
-  VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE |
-  VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_METADATA, 
cleanup);
+  VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE, 
cleanup);
 
 if (priv->restricted) {
 virReportError(VIR_ERR_OPERATION_DENIED, "%s",
@@ -86,8 +82,6 @@ virLockSpaceProtocolDispatchAcquireResource(virNetServerPtr 
server ATTRIBUTE_UNU
 newFlags |= VIR_LOCK_SPACE_ACQUIRE_SHARED;
 if (flags & VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE)
 newFlags |= VIR_LOCK_SPACE_ACQUIRE_AUTOCREATE;
-if (flags & VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_METADATA)
-start = METADATA_OFFSET;
 
 if (virLockSpaceAcquireResource(lockspace,
 args->name,
diff --git a/src/locking/lock_driver_lockd.c b/src/locking/lock_driver_lockd.c
index ca825e6026..16fce551c3 100644
--- a/src/locking/lock_driver_lockd.c
+++ b/src/locking/lock_driver_lockd.c
@@ -723,8 +723,7 @@ static int 
virLockManagerLockDaemonRelease(virLockManagerPtr lock,
 
 args.flags &=
 ~(VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_SHARED |
-  VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE |
-  VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_METADATA);
+  VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE);
 
 if (virNetClientProgramCall(program,
 client,
diff --git a/src/locking/lock_driver_lockd.h b/src/locking/lock_driver_lockd.h
index bebd804365..6931fe7425 100644
--- a/src/locking/lock_driver_lockd.h
+++ b/src/locking/lock_driver_lockd.h
@@ -25,7 +25,6 @@
 enum virLockSpaceProtocolAcquireResourceFlags {
 VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_SHARED = (1 << 0),
 VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE = (1 << 1),
-VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_METADATA = (1 << 2),
 };
 
 #endif /* __VIR_LOCK_DRIVER_LOCKD_H__ */
-- 
2.18.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH v3 11/13] Revert "lock_driver: Introduce new VIR_LOCK_MANAGER_OBJECT_TYPE_DAEMON"

2018-10-17 Thread Michal Privoznik
This reverts commit 22baf6e08c65d9174b24f66370724ce961ce9576.

Signed-off-by: Michal Privoznik 
---
 src/locking/lock_driver.h |   2 -
 src/locking/lock_driver_lockd.c   | 297 +++---
 src/locking/lock_driver_sanlock.c |  37 ++--
 3 files changed, 116 insertions(+), 220 deletions(-)

diff --git a/src/locking/lock_driver.h b/src/locking/lock_driver.h
index a9d2041c30..8b7521 100644
--- a/src/locking/lock_driver.h
+++ b/src/locking/lock_driver.h
@@ -42,8 +42,6 @@ typedef enum {
 typedef enum {
 /* The managed object is a virtual guest domain */
 VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN = 0,
-/* The managed object is a daemon (e.g. libvirtd) */
-VIR_LOCK_MANAGER_OBJECT_TYPE_DAEMON = 1,
 } virLockManagerObjectType;
 
 typedef enum {
diff --git a/src/locking/lock_driver_lockd.c b/src/locking/lock_driver_lockd.c
index 22a5a97913..ca825e6026 100644
--- a/src/locking/lock_driver_lockd.c
+++ b/src/locking/lock_driver_lockd.c
@@ -56,21 +56,10 @@ struct _virLockManagerLockDaemonResource {
 };
 
 struct _virLockManagerLockDaemonPrivate {
-virLockManagerObjectType type;
-union {
-struct {
-unsigned char uuid[VIR_UUID_BUFLEN];
-char *name;
-int id;
-pid_t pid;
-} dom;
-
-struct {
-unsigned char uuid[VIR_UUID_BUFLEN];
-char *name;
-pid_t pid;
-} daemon;
-} t;
+unsigned char uuid[VIR_UUID_BUFLEN];
+char *name;
+int id;
+pid_t pid;
 
 size_t nresources;
 virLockManagerLockDaemonResourcePtr resources;
@@ -167,30 +156,10 @@ 
virLockManagerLockDaemonConnectionRegister(virLockManagerPtr lock,
 memset(&args, 0, sizeof(args));
 
 args.flags = 0;
-
-switch (priv->type) {
-case VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN:
-memcpy(args.owner.uuid, priv->t.dom.uuid, VIR_UUID_BUFLEN);
-args.owner.name = priv->t.dom.name;
-args.owner.id = priv->t.dom.id;
-args.owner.pid = priv->t.dom.pid;
-break;
-
-case VIR_LOCK_MANAGER_OBJECT_TYPE_DAEMON:
-memcpy(args.owner.uuid, priv->t.daemon.uuid, VIR_UUID_BUFLEN);
-args.owner.name = priv->t.daemon.name;
-args.owner.pid = priv->t.daemon.pid;
-/* This one should not be needed. However, virtlockd
- * checks for ID because not every domain has a PID. */
-args.owner.id = priv->t.daemon.pid;
-break;
-
-default:
-virReportError(VIR_ERR_INTERNAL_ERROR,
-   _("Unknown lock manager object type %d"),
-   priv->type);
-return -1;
-}
+memcpy(args.owner.uuid, priv->uuid, VIR_UUID_BUFLEN);
+args.owner.name = priv->name;
+args.owner.id = priv->id;
+args.owner.pid = priv->pid;
 
 if (virNetClientProgramCall(program,
 client,
@@ -422,18 +391,7 @@ 
virLockManagerLockDaemonPrivateFree(virLockManagerLockDaemonPrivatePtr priv)
 }
 VIR_FREE(priv->resources);
 
-switch (priv->type) {
-case VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN:
-VIR_FREE(priv->t.dom.name);
-break;
-
-case VIR_LOCK_MANAGER_OBJECT_TYPE_DAEMON:
-VIR_FREE(priv->t.daemon.name);
-break;
-
-default:
-break;
-}
+VIR_FREE(priv->name);
 VIR_FREE(priv);
 }
 
@@ -462,82 +420,46 @@ static int virLockManagerLockDaemonNew(virLockManagerPtr 
lock,
 if (VIR_ALLOC(priv) < 0)
 return -1;
 
-priv->type = type;
-
-switch ((virLockManagerObjectType) type) {
+switch (type) {
 case VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN:
 for (i = 0; i < nparams; i++) {
 if (STREQ(params[i].key, "uuid")) {
-memcpy(priv->t.dom.uuid, params[i].value.uuid, 
VIR_UUID_BUFLEN);
+memcpy(priv->uuid, params[i].value.uuid, VIR_UUID_BUFLEN);
 } else if (STREQ(params[i].key, "name")) {
-if (VIR_STRDUP(priv->t.dom.name, params[i].value.str) < 0)
+if (VIR_STRDUP(priv->name, params[i].value.str) < 0)
 goto cleanup;
 } else if (STREQ(params[i].key, "id")) {
-priv->t.dom.id = params[i].value.iv;
+priv->id = params[i].value.iv;
 } else if (STREQ(params[i].key, "pid")) {
-priv->t.dom.pid = params[i].value.iv;
+priv->pid = params[i].value.iv;
 } else if (STREQ(params[i].key, "uri")) {
 /* ignored */
 } else {
 virReportError(VIR_ERR_INTERNAL_ERROR,
-   _("Unexpected parameter %s for domain object"),
+   _("Unexpected parameter %s for object"),
params[i].key);
 goto cleanup;
 }
 }
-if (priv->t.dom.id == 0) {
+if (priv->id == 0) {
 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
   

[libvirt] [PATCH v3 10/13] Revert "_virLockManagerLockDaemonPrivate: Move @hasRWDisks into dom union"

2018-10-17 Thread Michal Privoznik
This reverts commit aaf34cb9013d6d746f4edf9807408cb9dfbcf01d.

Signed-off-by: Michal Privoznik 
---
 src/locking/lock_driver_lockd.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/locking/lock_driver_lockd.c b/src/locking/lock_driver_lockd.c
index 268676c407..22a5a97913 100644
--- a/src/locking/lock_driver_lockd.c
+++ b/src/locking/lock_driver_lockd.c
@@ -63,8 +63,6 @@ struct _virLockManagerLockDaemonPrivate {
 char *name;
 int id;
 pid_t pid;
-
-bool hasRWDisks;
 } dom;
 
 struct {
@@ -76,6 +74,8 @@ struct _virLockManagerLockDaemonPrivate {
 
 size_t nresources;
 virLockManagerLockDaemonResourcePtr resources;
+
+bool hasRWDisks;
 };
 
 
@@ -585,7 +585,7 @@ static int 
virLockManagerLockDaemonAddResource(virLockManagerPtr lock,
 if (!driver->autoDiskLease) {
 if (!(flags & (VIR_LOCK_MANAGER_RESOURCE_SHARED |
VIR_LOCK_MANAGER_RESOURCE_READONLY)))
-priv->t.dom.hasRWDisks = true;
+priv->hasRWDisks = true;
 return 0;
 }
 
@@ -731,7 +731,7 @@ static int 
virLockManagerLockDaemonAcquire(virLockManagerPtr lock,
 
 if (priv->type == VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN &&
 priv->nresources == 0 &&
-priv->t.dom.hasRWDisks &&
+priv->hasRWDisks &&
 driver->requireLeaseForDisks) {
 virReportError(VIR_ERR_CONFIG_UNSUPPORTED, "%s",
_("Read/write, exclusive access, disks were present, 
but no leases specified"));
-- 
2.18.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH v3 07/13] Revert "lock_manager: Allow disabling configFile for virLockManagerPluginNew"

2018-10-17 Thread Michal Privoznik
This reverts commit 35b5b244da825fb41e35e4dc62e740d716214ec9.

Signed-off-by: Michal Privoznik 
---
 src/locking/lock_driver.h |  4 
 src/locking/lock_driver_lockd.c   |  4 +---
 src/locking/lock_driver_sanlock.c |  4 +---
 src/locking/lock_manager.c| 10 +++---
 4 files changed, 5 insertions(+), 17 deletions(-)

diff --git a/src/locking/lock_driver.h b/src/locking/lock_driver.h
index ae30abda7d..7c8f744be3 100644
--- a/src/locking/lock_driver.h
+++ b/src/locking/lock_driver.h
@@ -124,7 +124,6 @@ struct _virLockManagerParam {
 /**
  * virLockDriverInit:
  * @version: the libvirt requested plugin ABI version
- * @configFile: path to config file
  * @flags: the libvirt requested plugin optional extras
  *
  * Allow the plugin to validate the libvirt requested
@@ -132,9 +131,6 @@ struct _virLockManagerParam {
  * to block its use in versions of libvirtd which are
  * too old to support key features.
  *
- * The @configFile variable points to config file that the driver
- * should load. If NULL, no config file should be loaded.
- *
  * NB: A plugin may be loaded multiple times, for different
  * libvirt drivers (eg QEMU, LXC, UML)
  *
diff --git a/src/locking/lock_driver_lockd.c b/src/locking/lock_driver_lockd.c
index 0c672b05b1..85cdcf97be 100644
--- a/src/locking/lock_driver_lockd.c
+++ b/src/locking/lock_driver_lockd.c
@@ -371,10 +371,8 @@ static int virLockManagerLockDaemonInit(unsigned int 
version,
 driver->requireLeaseForDisks = true;
 driver->autoDiskLease = true;
 
-if (configFile &&
-virLockManagerLockDaemonLoadConfig(configFile) < 0) {
+if (virLockManagerLockDaemonLoadConfig(configFile) < 0)
 goto error;
-}
 
 if (driver->autoDiskLease) {
 if (driver->fileLockSpaceDir &&
diff --git a/src/locking/lock_driver_sanlock.c 
b/src/locking/lock_driver_sanlock.c
index 3ad0dc9bed..b10d8197c5 100644
--- a/src/locking/lock_driver_sanlock.c
+++ b/src/locking/lock_driver_sanlock.c
@@ -446,10 +446,8 @@ static int virLockManagerSanlockInit(unsigned int version,
 goto error;
 }
 
-if (configFile &&
-virLockManagerSanlockLoadConfig(driver, configFile) < 0) {
+if (virLockManagerSanlockLoadConfig(driver, configFile) < 0)
 goto error;
-}
 
 if (driver->autoDiskLease && !driver->hostID) {
 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
diff --git a/src/locking/lock_manager.c b/src/locking/lock_manager.c
index 9067f5a01a..d421b6acfc 100644
--- a/src/locking/lock_manager.c
+++ b/src/locking/lock_manager.c
@@ -104,8 +104,6 @@ static void virLockManagerLogParams(size_t nparams,
 /**
  * virLockManagerPluginNew:
  * @name: the name of the plugin
- * @driverName: the hypervisor driver that loads the plugin
- * @configDir: path to dir where config files are stored
  * @flag: optional plugin flags
  *
  * Attempt to load the plugin $(libdir)/libvirt/lock-driver/@name.so
@@ -131,13 +129,11 @@ virLockManagerPluginPtr virLockManagerPluginNew(const 
char *name,
 char *configFile = NULL;
 
 VIR_DEBUG("name=%s driverName=%s configDir=%s flags=0x%x",
-  name, NULLSTR(driverName), NULLSTR(configDir), flags);
+  name, driverName, configDir, flags);
 
-if (driverName && configDir &&
-virAsprintf(&configFile, "%s/%s-%s.conf",
-configDir, driverName, name) < 0) {
+if (virAsprintf(&configFile, "%s/%s-%s.conf",
+configDir, driverName, name) < 0)
 return NULL;
-}
 
 if (STREQ(name, "nop")) {
 driver = &virLockDriverNop;
-- 
2.18.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH v3 00/13] Implement alternative metadata locking

2018-10-17 Thread Michal Privoznik
v3 of:

https://www.redhat.com/archives/libvir-list/2018-October/msg00667.html

diff to v2:
- Introduced two new patches (1/13 and 2/13) so that even non-Linux
  platforms are covered
- In 4/13 I switched from indefinite wait for lock to a lock with
  timeout (of 10 seconds). This is basically to prevent us stalling if
  some app misbehaves and holds the file locked for eternity.


Michal Prívozník (13):
  virprocess: Introduce virProcessRunInFork
  virprocess: Make virProcessRunInMountNamespace use virProcessRunInFork
  security: Always spawn process for transactions
  security_manager: Rework metadata locking
  Revert "security_manager: Load lock plugin on init"
  Revert "qemu_conf: Introduce metadata_lock_manager"
  Revert "lock_manager: Allow disabling configFile for
virLockManagerPluginNew"
  Revert "lock_driver: Introduce VIR_LOCK_MANAGER_ACQUIRE_ROLLBACK"
  Revert "lock_driver: Introduce
VIR_LOCK_MANAGER_RESOURCE_TYPE_METADATA"
  Revert "_virLockManagerLockDaemonPrivate: Move @hasRWDisks into dom
union"
  Revert "lock_driver: Introduce new
VIR_LOCK_MANAGER_OBJECT_TYPE_DAEMON"
  Revert "lock_driver_lockd: Introduce
VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_METADATA flag"
  Revert "virlockspace: Allow caller to specify start and length offset
in virLockSpaceAcquireResource"

 cfg.mk |   4 +-
 src/libvirt_private.syms   |   1 +
 src/locking/lock_daemon_dispatch.c |  11 +-
 src/locking/lock_driver.h  |  12 -
 src/locking/lock_driver_lockd.c| 421 ++---
 src/locking/lock_driver_lockd.h|   1 -
 src/locking/lock_driver_sanlock.c  |  44 +--
 src/locking/lock_manager.c |  10 +-
 src/lxc/lxc_controller.c   |   3 +-
 src/lxc/lxc_driver.c   |   2 +-
 src/qemu/qemu_conf.c   |   1 -
 src/qemu/qemu_conf.h   |   1 -
 src/qemu/qemu_driver.c |   3 -
 src/security/security_dac.c|  12 +-
 src/security/security_manager.c| 250 +
 src/security/security_manager.h|  19 +-
 src/security/security_selinux.c|  11 +-
 src/util/virlockspace.c|  15 +-
 src/util/virlockspace.h|   4 -
 src/util/virprocess.c  |  69 -
 src/util/virprocess.h  |  16 ++
 tests/seclabeltest.c   |   2 +-
 tests/securityselinuxlabeltest.c   |   2 +-
 tests/securityselinuxtest.c|   2 +-
 tests/testutilsqemu.c  |   2 +-
 tests/virlockspacetest.c   |  29 +-
 26 files changed, 387 insertions(+), 560 deletions(-)

-- 
2.18.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

[libvirt] [PATCH v3 06/13] Revert "qemu_conf: Introduce metadata_lock_manager"

2018-10-17 Thread Michal Privoznik
This reverts commit 8b8aefb3d6ae2139ea3d4ef6d7dd2c06f57f6075.

Signed-off-by: Michal Privoznik 
---
 src/qemu/qemu_conf.c | 1 -
 src/qemu/qemu_conf.h | 1 -
 2 files changed, 2 deletions(-)

diff --git a/src/qemu/qemu_conf.c b/src/qemu/qemu_conf.c
index 17b7e11e02..32da9a7351 100644
--- a/src/qemu/qemu_conf.c
+++ b/src/qemu/qemu_conf.c
@@ -426,7 +426,6 @@ static void virQEMUDriverConfigDispose(void *obj)
 virStringListFree(cfg->securityDriverNames);
 
 VIR_FREE(cfg->lockManagerName);
-VIR_FREE(cfg->metadataLockManagerName);
 
 virFirmwareFreeList(cfg->firmwares, cfg->nfirmwares);
 
diff --git a/src/qemu/qemu_conf.h b/src/qemu/qemu_conf.h
index f876f9117c..49e5c50f22 100644
--- a/src/qemu/qemu_conf.h
+++ b/src/qemu/qemu_conf.h
@@ -186,7 +186,6 @@ struct _virQEMUDriverConfig {
 bool autoStartBypassCache;
 
 char *lockManagerName;
-char *metadataLockManagerName;
 
 int keepAliveInterval;
 unsigned int keepAliveCount;
-- 
2.18.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH v3 08/13] Revert "lock_driver: Introduce VIR_LOCK_MANAGER_ACQUIRE_ROLLBACK"

2018-10-17 Thread Michal Privoznik
This reverts commit 385eb8399bdb1610447c2857abfe99cee4a9fb9e.

Signed-off-by: Michal Privoznik 
---
 src/locking/lock_driver.h   |  4 --
 src/locking/lock_driver_lockd.c | 82 ++---
 2 files changed, 24 insertions(+), 62 deletions(-)

diff --git a/src/locking/lock_driver.h b/src/locking/lock_driver.h
index 7c8f744be3..9be0abcfba 100644
--- a/src/locking/lock_driver.h
+++ b/src/locking/lock_driver.h
@@ -67,10 +67,6 @@ typedef enum {
 VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY = (1 << 0),
 /* Prevent further lock/unlock calls from this process */
 VIR_LOCK_MANAGER_ACQUIRE_RESTRICT = (1 << 1),
-/* Used when acquiring more resources in which one of them
- * can't be acquired, perform a rollback and release all
- * resources acquired so far. */
-VIR_LOCK_MANAGER_ACQUIRE_ROLLBACK = (1 << 2),
 } virLockManagerAcquireFlags;
 
 typedef enum {
diff --git a/src/locking/lock_driver_lockd.c b/src/locking/lock_driver_lockd.c
index 85cdcf97be..d6551e125c 100644
--- a/src/locking/lock_driver_lockd.c
+++ b/src/locking/lock_driver_lockd.c
@@ -735,34 +735,6 @@ static int 
virLockManagerLockDaemonAddResource(virLockManagerPtr lock,
 }
 
 
-static int virLockManagerLockDaemonReleaseImpl(virNetClientPtr client,
-   virNetClientProgramPtr program,
-   int counter,
-   
virLockManagerLockDaemonResourcePtr res)
-{
-virLockSpaceProtocolReleaseResourceArgs args;
-
-memset(&args, 0, sizeof(args));
-
-args.path = res->lockspace;
-args.name = res->name;
-args.flags = res->flags;
-
-args.flags &=
-~(VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_SHARED |
-  VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE |
-  VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_METADATA);
-
-return virNetClientProgramCall(program,
-   client,
-   counter,
-   
VIR_LOCK_SPACE_PROTOCOL_PROC_RELEASE_RESOURCE,
-   0, NULL, NULL, NULL,
-   
(xdrproc_t)xdr_virLockSpaceProtocolReleaseResourceArgs, &args,
-   (xdrproc_t)xdr_void, NULL);
-}
-
-
 static int virLockManagerLockDaemonAcquire(virLockManagerPtr lock,
const char *state ATTRIBUTE_UNUSED,
unsigned int flags,
@@ -773,13 +745,10 @@ static int 
virLockManagerLockDaemonAcquire(virLockManagerPtr lock,
 virNetClientProgramPtr program = NULL;
 int counter = 0;
 int rv = -1;
-ssize_t i;
-ssize_t lastGood = -1;
 virLockManagerLockDaemonPrivatePtr priv = lock->privateData;
 
 virCheckFlags(VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY |
-  VIR_LOCK_MANAGER_ACQUIRE_RESTRICT |
-  VIR_LOCK_MANAGER_ACQUIRE_ROLLBACK, -1);
+  VIR_LOCK_MANAGER_ACQUIRE_RESTRICT, -1);
 
 if (priv->type == VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN &&
 priv->nresources == 0 &&
@@ -798,6 +767,7 @@ static int 
virLockManagerLockDaemonAcquire(virLockManagerPtr lock,
 goto cleanup;
 
 if (!(flags & VIR_LOCK_MANAGER_ACQUIRE_REGISTER_ONLY)) {
+size_t i;
 for (i = 0; i < priv->nresources; i++) {
 virLockSpaceProtocolAcquireResourceArgs args;
 
@@ -815,7 +785,6 @@ static int 
virLockManagerLockDaemonAcquire(virLockManagerPtr lock,
 
(xdrproc_t)xdr_virLockSpaceProtocolAcquireResourceArgs, &args,
 (xdrproc_t)xdr_void, NULL) < 0)
 goto cleanup;
-lastGood = i;
 }
 }
 
@@ -826,28 +795,8 @@ static int 
virLockManagerLockDaemonAcquire(virLockManagerPtr lock,
 rv = 0;
 
  cleanup:
-if (rv < 0) {
-int saved_errno = errno;
-virErrorPtr origerr;
-
-virErrorPreserveLast(&origerr);
-if (fd)
-VIR_FORCE_CLOSE(*fd);
-
-if (flags & VIR_LOCK_MANAGER_ACQUIRE_ROLLBACK) {
-for (i = lastGood; i >= 0; i--) {
-virLockManagerLockDaemonResourcePtr res = &priv->resources[i];
-
-if (virLockManagerLockDaemonReleaseImpl(client, program,
-counter++, res) < 0)
-VIR_WARN("Unable to release resource lockspace=%s name=%s",
- res->lockspace, res->name);
-}
-}
-
-virErrorRestore(&origerr);
-errno = saved_errno;
-}
+if (rv != 0 && fd)
+VIR_FORCE_CLOSE(*fd);
 virNetClientClose(client);
 virObjectUnref(client);
 virObjectUnref(program);
@@ -875,10 +824,27 @@ static int 
virLockManagerLockDaemonRelease(virLockManagerPtr lock,
 goto cleanup;
 
 for (i = 0; i < priv->nresources; i++) {
-

[libvirt] [PATCH v3 04/13] security_manager: Rework metadata locking

2018-10-17 Thread Michal Privoznik
Trying to use virlockd to lock metadata turns out to be too big
gun. Since we will always spawn a separate process for relabeling
we are safe to use thread unsafe POSIX locks and take out
virtlockd completely out of the picture.

Signed-off-by: Michal Privoznik 
---
 src/security/security_dac.c |  10 +-
 src/security/security_manager.c | 225 +---
 src/security/security_manager.h |  17 ++-
 src/security/security_selinux.c |   9 +-
 4 files changed, 139 insertions(+), 122 deletions(-)

diff --git a/src/security/security_dac.c b/src/security/security_dac.c
index 580d800bb1..ad2561a241 100644
--- a/src/security/security_dac.c
+++ b/src/security/security_dac.c
@@ -204,6 +204,7 @@ virSecurityDACTransactionRun(pid_t pid ATTRIBUTE_UNUSED,
  void *opaque)
 {
 virSecurityDACChownListPtr list = opaque;
+virSecurityManagerMetadataLockStatePtr state;
 const char **paths = NULL;
 size_t npaths = 0;
 size_t i;
@@ -216,14 +217,10 @@ virSecurityDACTransactionRun(pid_t pid ATTRIBUTE_UNUSED,
 for (i = 0; i < list->nItems; i++) {
 const char *p = list->items[i]->path;
 
-if (!p ||
-virFileIsDir(p))
-continue;
-
 VIR_APPEND_ELEMENT_COPY_INPLACE(paths, npaths, p);
 }
 
-if (virSecurityManagerMetadataLock(list->manager, paths, npaths) < 0)
+if (!(state = virSecurityManagerMetadataLock(list->manager, paths, 
npaths)))
 goto cleanup;
 
 for (i = 0; i < list->nItems; i++) {
@@ -246,8 +243,7 @@ virSecurityDACTransactionRun(pid_t pid ATTRIBUTE_UNUSED,
 break;
 }
 
-if (virSecurityManagerMetadataUnlock(list->manager, paths, npaths) < 0)
-goto cleanup;
+virSecurityManagerMetadataUnlock(list->manager, &state);
 
 if (rv < 0)
 goto cleanup;
diff --git a/src/security/security_manager.c b/src/security/security_manager.c
index c6c80e6165..b675f8ab46 100644
--- a/src/security/security_manager.c
+++ b/src/security/security_manager.c
@@ -21,6 +21,10 @@
  */
 #include 
 
+#include 
+#include 
+#include 
+
 #include "security_driver.h"
 #include "security_stack.h"
 #include "security_dac.h"
@@ -30,14 +34,11 @@
 #include "virlog.h"
 #include "locking/lock_manager.h"
 #include "virfile.h"
-#include "virtime.h"
 
 #define VIR_FROM_THIS VIR_FROM_SECURITY
 
 VIR_LOG_INIT("security.security_manager");
 
-virMutex lockManagerMutex = VIR_MUTEX_INITIALIZER;
-
 struct _virSecurityManager {
 virObjectLockable parent;
 
@@ -47,10 +48,6 @@ struct _virSecurityManager {
 void *privateData;
 
 virLockManagerPluginPtr lockPlugin;
-/* This is a FD that represents a connection to virtlockd so
- * that connection is kept open in between MetdataLock() and
- * MetadataUnlock() calls. */
-int clientfd;
 };
 
 static virClassPtr virSecurityManagerClass;
@@ -66,7 +63,6 @@ void virSecurityManagerDispose(void *obj)
 mgr->drv->close(mgr);
 
 virObjectUnref(mgr->lockPlugin);
-VIR_FORCE_CLOSE(mgr->clientfd);
 
 VIR_FREE(mgr->privateData);
 }
@@ -119,7 +115,6 @@ virSecurityManagerNewDriver(virSecurityDriverPtr drv,
 mgr->flags = flags;
 mgr->virtDriver = virtDriver;
 VIR_STEAL_PTR(mgr->privateData, privateData);
-mgr->clientfd = -1;
 
 if (drv->open(mgr) < 0)
 goto error;
@@ -1276,129 +1271,153 @@ 
virSecurityManagerRestoreTPMLabels(virSecurityManagerPtr mgr,
 }
 
 
-static virLockManagerPtr
-virSecurityManagerNewLockManager(virSecurityManagerPtr mgr,
- const char * const *paths,
- size_t npaths)
+struct _virSecurityManagerMetadataLockState {
+size_t nfds;
+int *fds;
+};
+
+
+static int
+cmpstringp(const void *p1, const void *p2)
 {
-virLockManagerPtr lock;
-virLockManagerParam params[] = {
-{ .type = VIR_LOCK_MANAGER_PARAM_TYPE_UUID,
-.key = "uuid",
-},
-{ .type = VIR_LOCK_MANAGER_PARAM_TYPE_STRING,
-.key = "name",
-.value = { .cstr = "libvirtd-sec" },
-},
-{ .type = VIR_LOCK_MANAGER_PARAM_TYPE_UINT,
-.key = "pid",
-.value = { .iv = getpid() },
-},
-};
-const unsigned int flags = 0;
-size_t i;
+const char *s1 = *(char * const *) p1;
+const char *s2 = *(char * const *) p2;
 
-if (virGetHostUUID(params[0].value.uuid) < 0)
-return NULL;
+if (!s1 && !s2)
+return 0;
 
-if (!(lock = 
virLockManagerNew(virLockManagerPluginGetDriver(mgr->lockPlugin),
-   VIR_LOCK_MANAGER_OBJECT_TYPE_DAEMON,
-   ARRAY_CARDINALITY(params),
-   params,
-   flags)))
-return NULL;
+if (!s1 || !s2)
+return s2 ? -1 : 1;
 
-for (i = 0; i < npaths; i++) {
-if (virLockManagerAddResource(lock,
-  VIR_LOCK_MANAGER_RE

[libvirt] [PATCH v3 05/13] Revert "security_manager: Load lock plugin on init"

2018-10-17 Thread Michal Privoznik
This reverts commit 3e26b476b5f322353bf0dcd8e3f037ca672b8c62.

Signed-off-by: Michal Privoznik 
---
 cfg.mk   |  4 +---
 src/lxc/lxc_controller.c |  3 +--
 src/lxc/lxc_driver.c |  2 +-
 src/qemu/qemu_driver.c   |  3 ---
 src/security/security_manager.c  | 25 +
 src/security/security_manager.h  |  2 --
 tests/seclabeltest.c |  2 +-
 tests/securityselinuxlabeltest.c |  2 +-
 tests/securityselinuxtest.c  |  2 +-
 tests/testutilsqemu.c|  2 +-
 10 files changed, 8 insertions(+), 39 deletions(-)

diff --git a/cfg.mk b/cfg.mk
index 4790d0b7e7..eddd110ed6 100644
--- a/cfg.mk
+++ b/cfg.mk
@@ -787,10 +787,8 @@ sc_prohibit_cross_inclusion:
  case $$dir in \
util/) safe="util";; \
access/ | conf/) safe="($$dir|conf|util)";; \
-   cpu/| network/| node_device/| rpc/| storage/) \
+   cpu/| network/| node_device/| rpc/| security/| storage/) \
  safe="($$dir|util|conf|storage)";; \
-   security/) \
- safe="($$dir|util|conf|storage|locking)";; \
xenapi/ | xenconfig/ ) safe="($$dir|util|conf|xen|cpu)";; \
*) safe="($$dir|$(mid_dirs)|util)";; \
  esac; \
diff --git a/src/lxc/lxc_controller.c b/src/lxc/lxc_controller.c
index 62dfd09473..e853d02d65 100644
--- a/src/lxc/lxc_controller.c
+++ b/src/lxc/lxc_controller.c
@@ -2624,8 +2624,7 @@ int main(int argc, char *argv[])
 ctrl->handshakeFd = handshakeFd;
 
 if (!(ctrl->securityManager = virSecurityManagerNew(securityDriver,
-LXC_DRIVER_NAME,
-NULL, 0)))
+LXC_DRIVER_NAME, 0)))
 goto cleanup;
 
 if (ctrl->def->seclabels) {
diff --git a/src/lxc/lxc_driver.c b/src/lxc/lxc_driver.c
index f732305649..990871d9b3 100644
--- a/src/lxc/lxc_driver.c
+++ b/src/lxc/lxc_driver.c
@@ -1531,7 +1531,7 @@ lxcSecurityInit(virLXCDriverConfigPtr cfg)
 flags |= VIR_SECURITY_MANAGER_REQUIRE_CONFINED;
 
 virSecurityManagerPtr mgr = virSecurityManagerNew(cfg->securityDriverName,
-  LXC_DRIVER_NAME, NULL, 
flags);
+  LXC_DRIVER_NAME, flags);
 if (!mgr)
 goto error;
 
diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
index a52e2495d5..3a1185f947 100644
--- a/src/qemu/qemu_driver.c
+++ b/src/qemu/qemu_driver.c
@@ -350,7 +350,6 @@ qemuSecurityInit(virQEMUDriverPtr driver)
 while (names && *names) {
 if (!(mgr = qemuSecurityNew(*names,
 QEMU_DRIVER_NAME,
-cfg->metadataLockManagerName,
 flags)))
 goto error;
 if (!stack) {
@@ -366,7 +365,6 @@ qemuSecurityInit(virQEMUDriverPtr driver)
 } else {
 if (!(mgr = qemuSecurityNew(NULL,
 QEMU_DRIVER_NAME,
-cfg->metadataLockManagerName,
 flags)))
 goto error;
 if (!(stack = qemuSecurityNewStack(mgr)))
@@ -383,7 +381,6 @@ qemuSecurityInit(virQEMUDriverPtr driver)
cfg->user,
cfg->group,
flags,
-   cfg->metadataLockManagerName,
qemuSecurityChownCallback)))
 goto error;
 if (!stack) {
diff --git a/src/security/security_manager.c b/src/security/security_manager.c
index b675f8ab46..47be47df48 100644
--- a/src/security/security_manager.c
+++ b/src/security/security_manager.c
@@ -32,7 +32,6 @@
 #include "viralloc.h"
 #include "virobject.h"
 #include "virlog.h"
-#include "locking/lock_manager.h"
 #include "virfile.h"
 
 #define VIR_FROM_THIS VIR_FROM_SECURITY
@@ -46,8 +45,6 @@ struct _virSecurityManager {
 unsigned int flags;
 const char *virtDriver;
 void *privateData;
-
-virLockManagerPluginPtr lockPlugin;
 };
 
 static virClassPtr virSecurityManagerClass;
@@ -58,12 +55,8 @@ void virSecurityManagerDispose(void *obj)
 {
 virSecurityManagerPtr mgr = obj;
 
-if (mgr->drv &&
-mgr->drv->close)
+if (mgr->drv->close)
 mgr->drv->close(mgr);
-
-virObjectUnref(mgr->lockPlugin);
-
 VIR_FREE(mgr->privateData);
 }
 
@@ -83,7 +76,6 @@ VIR_ONCE_GLOBAL_INIT(virSecurityManager);
 static virSecurityManagerPtr
 virSecurityManagerNewDriver(virSecurityDriverPtr drv,
 const char *virtDriver,
-const char *lockManagerPluginName,
 unsigned int flags)
 {
 virSecurityManagerPtr mgr = NULL;
@@ -103,14 +95,6 @@ virSecurit

[libvirt] [PATCH v3 2/6] vfio/mdev: Add "aggregation" attribute for supported mdev type

2018-10-17 Thread Zhenyu Wang
For supported mdev driver to create aggregated device, this creates
new "aggregation" attribute for target type, which will show maximum
number of instance resources that can be aggregated.

Cc: Kirti Wankhede 
Cc: Alex Williamson 
Cc: Kevin Tian 
Cc: Cornelia Huck 
Signed-off-by: Zhenyu Wang 
---
 drivers/vfio/mdev/mdev_core.c| 19 +++
 drivers/vfio/mdev/mdev_private.h |  2 ++
 drivers/vfio/mdev/mdev_sysfs.c   | 22 ++
 include/linux/mdev.h |  8 
 4 files changed, 51 insertions(+)

diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
index 545c52ec7618..8f8bbb72e5d9 100644
--- a/drivers/vfio/mdev/mdev_core.c
+++ b/drivers/vfio/mdev/mdev_core.c
@@ -161,6 +161,25 @@ static int mdev_device_remove_cb(struct device *dev, void 
*data)
return mdev_device_remove(dev, data ? *(bool *)data : true);
 }
 
+int mdev_max_aggregated_instances(struct kobject *kobj, struct device *dev,
+ unsigned int *m)
+{
+   struct mdev_parent *parent;
+   struct mdev_type *type = to_mdev_type(kobj);
+   int ret;
+
+   parent = mdev_get_parent(type->parent);
+   if (!parent)
+   return -EINVAL;
+
+   if (parent->ops->max_aggregated_instances) {
+   ret = parent->ops->max_aggregated_instances(kobj, dev, m);
+   } else
+   ret = -EINVAL;
+   mdev_put_parent(parent);
+   return ret;
+}
+
 /*
  * mdev_register_device : Register a device
  * @dev: device structure representing parent device.
diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h
index e90d295d3927..f1289db75884 100644
--- a/drivers/vfio/mdev/mdev_private.h
+++ b/drivers/vfio/mdev/mdev_private.h
@@ -63,4 +63,6 @@ int  mdev_device_create(struct kobject *kobj, struct device 
*dev, uuid_le uuid,
unsigned int instances);
 int  mdev_device_remove(struct device *dev, bool force_remove);
 
+int  mdev_max_aggregated_instances(struct kobject *kobj, struct device *dev,
+  unsigned int *m);
 #endif /* MDEV_PRIVATE_H */
diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c
index aefed0c8891b..a329d6ab6cb9 100644
--- a/drivers/vfio/mdev/mdev_sysfs.c
+++ b/drivers/vfio/mdev/mdev_sysfs.c
@@ -103,6 +103,18 @@ static ssize_t create_store(struct kobject *kobj, struct 
device *dev,
 
 MDEV_TYPE_ATTR_WO(create);
 
+static ssize_t
+aggregation_show(struct kobject *kobj, struct device *dev, char *buf)
+{
+   unsigned int m;
+
+   if (mdev_max_aggregated_instances(kobj, dev, &m))
+   return sprintf(buf, "1\n");
+   else
+   return sprintf(buf, "%u\n", m);
+}
+MDEV_TYPE_ATTR_RO(aggregation);
+
 static void mdev_type_release(struct kobject *kobj)
 {
struct mdev_type *type = to_mdev_type(kobj);
@@ -145,6 +157,14 @@ struct mdev_type *add_mdev_supported_type(struct 
mdev_parent *parent,
if (ret)
goto attr_create_failed;
 
+   if (parent->ops->create_with_instances &&
+   parent->ops->max_aggregated_instances) {
+   ret = sysfs_create_file(&type->kobj,
+   &mdev_type_attr_aggregation.attr);
+   if (ret)
+   goto attr_aggregate_failed;
+   }
+
type->devices_kobj = kobject_create_and_add("devices", &type->kobj);
if (!type->devices_kobj) {
ret = -ENOMEM;
@@ -165,6 +185,8 @@ struct mdev_type *add_mdev_supported_type(struct 
mdev_parent *parent,
 attrs_failed:
kobject_put(type->devices_kobj);
 attr_devices_failed:
+   sysfs_remove_file(&type->kobj, &mdev_type_attr_aggregation.attr);
+attr_aggregate_failed:
sysfs_remove_file(&type->kobj, &mdev_type_attr_create.attr);
 attr_create_failed:
kobject_del(&type->kobj);
diff --git a/include/linux/mdev.h b/include/linux/mdev.h
index c12c0bfc5598..66cfdb0bf0d6 100644
--- a/include/linux/mdev.h
+++ b/include/linux/mdev.h
@@ -39,6 +39,11 @@ struct mdev_device;
  *   that is being created
  *  @instances: number of instances to aggregate
  * Returns integer: success (0) or error (< 0)
+ * @max_aggregated_instances: Return max number for aggregated resources
+ * @kobj: kobject of type
+ *  @dev: mdev parent device for target type
+ *  @max: return max number of instances which can 
aggregate
+ * Returns integer: success (0) or error (< 0)
  * @remove:Called to free resources in parent device's driver for a
  * a mediated device. It is mandatory to provide 'remove'
  * ops.
@@ -82,6 +87,9 @@ struct mdev_parent_ops {
int (*create_with_instances)(struct kobject *kobj,
 struct mdev_device *mdev,
  

[libvirt] [PATCH v3 09/13] Revert "lock_driver: Introduce VIR_LOCK_MANAGER_RESOURCE_TYPE_METADATA"

2018-10-17 Thread Michal Privoznik
This reverts commit 997283b54b0e1f599aed3085ceba027eb8110acb.

Signed-off-by: Michal Privoznik 
---
 src/locking/lock_driver.h |  2 --
 src/locking/lock_driver_lockd.c   | 47 +--
 src/locking/lock_driver_sanlock.c |  3 +-
 3 files changed, 14 insertions(+), 38 deletions(-)

diff --git a/src/locking/lock_driver.h b/src/locking/lock_driver.h
index 9be0abcfba..a9d2041c30 100644
--- a/src/locking/lock_driver.h
+++ b/src/locking/lock_driver.h
@@ -51,8 +51,6 @@ typedef enum {
 VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK = 0,
 /* A lease against an arbitrary resource */
 VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE = 1,
-/* The resource to be locked is a metadata */
-VIR_LOCK_MANAGER_RESOURCE_TYPE_METADATA = 2,
 } virLockManagerResourceType;
 
 typedef enum {
diff --git a/src/locking/lock_driver_lockd.c b/src/locking/lock_driver_lockd.c
index d6551e125c..268676c407 100644
--- a/src/locking/lock_driver_lockd.c
+++ b/src/locking/lock_driver_lockd.c
@@ -563,7 +563,7 @@ static int 
virLockManagerLockDaemonAddResource(virLockManagerPtr lock,
 virLockManagerLockDaemonPrivatePtr priv = lock->privateData;
 char *newName = NULL;
 char *newLockspace = NULL;
-int newFlags = 0;
+bool autoCreate = false;
 int ret = -1;
 
 virCheckFlags(VIR_LOCK_MANAGER_RESOURCE_READONLY |
@@ -575,7 +575,7 @@ static int 
virLockManagerLockDaemonAddResource(virLockManagerPtr lock,
 switch (priv->type) {
 case VIR_LOCK_MANAGER_OBJECT_TYPE_DOMAIN:
 
-switch ((virLockManagerResourceType) type) {
+switch (type) {
 case VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK:
 if (params || nparams) {
 virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
@@ -602,7 +602,7 @@ static int 
virLockManagerLockDaemonAddResource(virLockManagerPtr lock,
 VIR_DEBUG("Got an LVM UUID %s for %s", newName, name);
 if (VIR_STRDUP(newLockspace, driver->lvmLockSpaceDir) < 0)
 goto cleanup;
-newFlags |= 
VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE;
+autoCreate = true;
 break;
 }
 virResetLastError();
@@ -619,7 +619,7 @@ static int 
virLockManagerLockDaemonAddResource(virLockManagerPtr lock,
 VIR_DEBUG("Got an SCSI ID %s for %s", newName, name);
 if (VIR_STRDUP(newLockspace, driver->scsiLockSpaceDir) < 0)
 goto cleanup;
-newFlags |= 
VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE;
+autoCreate = true;
 break;
 }
 virResetLastError();
@@ -631,7 +631,7 @@ static int 
virLockManagerLockDaemonAddResource(virLockManagerPtr lock,
 goto cleanup;
 if (virCryptoHashString(VIR_CRYPTO_HASH_SHA256, name, 
&newName) < 0)
 goto cleanup;
-newFlags |= 
VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_AUTOCREATE;
+autoCreate = true;
 VIR_DEBUG("Using indirect lease %s for %s", newName, name);
 } else {
 if (VIR_STRDUP(newLockspace, "") < 0)
@@ -676,8 +676,6 @@ static int 
virLockManagerLockDaemonAddResource(virLockManagerPtr lock,
 goto cleanup;
 
 }   break;
-
-case VIR_LOCK_MANAGER_RESOURCE_TYPE_METADATA:
 default:
 virReportError(VIR_ERR_INTERNAL_ERROR,
_("Unknown lock manager object type %d for domain 
lock object"),
@@ -687,29 +685,6 @@ static int 
virLockManagerLockDaemonAddResource(virLockManagerPtr lock,
 break;
 
 case VIR_LOCK_MANAGER_OBJECT_TYPE_DAEMON:
-switch ((virLockManagerResourceType) type) {
-case VIR_LOCK_MANAGER_RESOURCE_TYPE_METADATA:
-if (params || nparams) {
-virReportError(VIR_ERR_INTERNAL_ERROR, "%s",
-   _("Unexpected parameters for metadata 
resource"));
-goto cleanup;
-}
-if (VIR_STRDUP(newLockspace, "") < 0 ||
-VIR_STRDUP(newName, name) < 0)
-goto cleanup;
-newFlags |= VIR_LOCK_SPACE_PROTOCOL_ACQUIRE_RESOURCE_METADATA;
-break;
-
-case VIR_LOCK_MANAGER_RESOURCE_TYPE_DISK:
-case VIR_LOCK_MANAGER_RESOURCE_TYPE_LEASE:
-default:
-virReportError(VIR_ERR_INTERNAL_ERROR,
-   _("Unknown lock manager object type %d for daemon 
lock object"),
-   type);
-goto cleanup;
-}
-break;
-
 default:
 virReportError(VIR_ERR_INTERNAL_ERROR,
_("Unknown lock manager object type %d"),
@@ -717,15 +692,19 @@ static int 
virLockManagerLockDaemonAddResource(virLockManagerPtr lock,
 goto cleanup;
 }
 
-if (flags & VIR_LOCK_MANAGER_R

[libvirt] [PATCH v3 01/13] virprocess: Introduce virProcessRunInFork

2018-10-17 Thread Michal Privoznik
This new helper can be used to spawn a child process and run
passed callback from it. This will come handy esp. if the
callback is not thread safe.

Signed-off-by: Michal Privoznik 
---
 src/libvirt_private.syms |  1 +
 src/util/virprocess.c| 81 
 src/util/virprocess.h| 16 
 3 files changed, 98 insertions(+)

diff --git a/src/libvirt_private.syms b/src/libvirt_private.syms
index 335210c31d..e2dc85310a 100644
--- a/src/libvirt_private.syms
+++ b/src/libvirt_private.syms
@@ -2625,6 +2625,7 @@ virProcessKill;
 virProcessKillPainfully;
 virProcessKillPainfullyDelay;
 virProcessNamespaceAvailable;
+virProcessRunInFork;
 virProcessRunInMountNamespace;
 virProcessSchedPolicyTypeFromString;
 virProcessSchedPolicyTypeToString;
diff --git a/src/util/virprocess.c b/src/util/virprocess.c
index 3883c708fc..51b9ccb1bb 100644
--- a/src/util/virprocess.c
+++ b/src/util/virprocess.c
@@ -1165,6 +1165,87 @@ virProcessRunInMountNamespace(pid_t pid,
 }
 
 
+static int
+virProcessForkHelper(int errfd,
+ pid_t pid,
+ virProcessForkCallback cb,
+ void *opaque)
+{
+if (cb(pid, opaque) < 0) {
+virErrorPtr err = virGetLastError();
+if (err) {
+size_t len = strlen(err->message) + 1;
+ignore_value(safewrite(errfd, err->message, len));
+}
+
+return -1;
+}
+
+return 0;
+}
+
+
+/**
+ * virProcessRunInFork:
+ * @cb: callback to run
+ * @opaque: opaque data to @cb
+ *
+ * Do the fork and run @cb in the child. This can be used when
+ * @cb does something thread unsafe, for instance. However, due
+ * to possible signal propagation @cb should only call async
+ * signal safe functions. On return, the returned value is either
+ * -1 with error message reported if something went bad in the
+ *  parent, if child has died from a signal or if the child
+ *  returned EXIT_CANCELED. Otherwise the returned value is the
+ *  exit status of the child.
+ */
+int
+virProcessRunInFork(virProcessForkCallback cb,
+void *opaque)
+{
+int ret = -1;
+pid_t child = -1;
+pid_t parent = getpid();
+int errfd[2] = { -1, -1 };
+
+if (pipe2(errfd, O_CLOEXEC) < 0) {
+virReportSystemError(errno, "%s",
+ _("Cannot create pipe for child"));
+return -1;
+}
+
+if ((child = virFork()) < 0)
+goto cleanup;
+
+if (child == 0) {
+VIR_FORCE_CLOSE(errfd[0]);
+ret = virProcessForkHelper(errfd[1], parent, cb, opaque);
+VIR_FORCE_CLOSE(errfd[1]);
+_exit(ret < 0 ? EXIT_CANCELED : ret);
+} else {
+int status;
+VIR_AUTOFREE(char *) buf = NULL;
+
+VIR_FORCE_CLOSE(errfd[1]);
+ignore_value(virFileReadHeaderFD(errfd[0], 1024, &buf));
+ret = virProcessWait(child, &status, false);
+if (!ret) {
+ret = status == EXIT_CANCELED ? -1 : status;
+if (ret) {
+virReportError(VIR_ERR_INTERNAL_ERROR,
+   _("child reported: %s"),
+   NULLSTR(buf));
+}
+}
+}
+
+ cleanup:
+VIR_FORCE_CLOSE(errfd[0]);
+VIR_FORCE_CLOSE(errfd[1]);
+return ret;
+}
+
+
 #if defined(HAVE_SYS_MOUNT_H) && defined(HAVE_UNSHARE)
 int
 virProcessSetupPrivateMountNS(void)
diff --git a/src/util/virprocess.h b/src/util/virprocess.h
index 5faa0892fe..1bae8c9212 100644
--- a/src/util/virprocess.h
+++ b/src/util/virprocess.h
@@ -93,6 +93,22 @@ int virProcessRunInMountNamespace(pid_t pid,
   virProcessNamespaceCallback cb,
   void *opaque);
 
+/**
+ * virProcessForkCallback:
+ * @pid: parent's pid
+ * @opaque: opaque data
+ *
+ * Callback to run in fork()-ed process.
+ *
+ * Returns: 0 on success,
+ * -1 on error (treated as EXIT_CANCELED)
+ */
+typedef int (*virProcessForkCallback)(pid_t pid,
+  void *opaque);
+
+int virProcessRunInFork(virProcessForkCallback cb,
+void *opaque);
+
 int virProcessSetupPrivateMountNS(void);
 
 int virProcessSetScheduler(pid_t pid,
-- 
2.18.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH v3 03/13] security: Always spawn process for transactions

2018-10-17 Thread Michal Privoznik
In the next commit the virSecurityManagerMetadataLock() is going
to be turned thread unsafe. Therefore, we have to spawn a
separate process for it. Always.

Signed-off-by: Michal Privoznik 
---
 src/security/security_dac.c | 2 +-
 src/security/security_selinux.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/security/security_dac.c b/src/security/security_dac.c
index da4a6c72fe..580d800bb1 100644
--- a/src/security/security_dac.c
+++ b/src/security/security_dac.c
@@ -563,7 +563,7 @@ virSecurityDACTransactionCommit(virSecurityManagerPtr mgr 
ATTRIBUTE_UNUSED,
 }
 
 if ((pid == -1 &&
- virSecurityDACTransactionRun(pid, list) < 0) ||
+ virProcessRunInFork(virSecurityDACTransactionRun, list) < 0) ||
 (pid != -1 &&
  virProcessRunInMountNamespace(pid,
virSecurityDACTransactionRun,
diff --git a/src/security/security_selinux.c b/src/security/security_selinux.c
index 467d1e6bfe..0e24b9b3ca 100644
--- a/src/security/security_selinux.c
+++ b/src/security/security_selinux.c
@@ -1104,7 +1104,7 @@ virSecuritySELinuxTransactionCommit(virSecurityManagerPtr 
mgr ATTRIBUTE_UNUSED,
 }
 
 if ((pid == -1 &&
- virSecuritySELinuxTransactionRun(pid, list) < 0) ||
+ virProcessRunInFork(virSecuritySELinuxTransactionRun, list) < 0) ||
 (pid != -1 &&
  virProcessRunInMountNamespace(pid,
virSecuritySELinuxTransactionRun,
-- 
2.18.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH v3 02/13] virprocess: Make virProcessRunInMountNamespace use virProcessRunInFork

2018-10-17 Thread Michal Privoznik
Both virProcessRunInMountNamespace() and virProcessRunInFork()
look very similar. De-duplicate the code and make
virProcessRunInMountNamespace() call virProcessRunInFork().

Signed-off-by: Michal Privoznik 
---
 src/util/virprocess.c | 62 +--
 1 file changed, 12 insertions(+), 50 deletions(-)

diff --git a/src/util/virprocess.c b/src/util/virprocess.c
index 51b9ccb1bb..3304879212 100644
--- a/src/util/virprocess.c
+++ b/src/util/virprocess.c
@@ -1073,11 +1073,17 @@ int virProcessGetStartTime(pid_t pid,
 #endif
 
 
-static int virProcessNamespaceHelper(int errfd,
- pid_t pid,
- virProcessNamespaceCallback cb,
+typedef struct _virProcessNamespaceHelperData virProcessNamespaceHelperData;
+struct _virProcessNamespaceHelperData {
+pid_t pid;
+virProcessNamespaceCallback cb;
+void *opaque;
+};
+
+static int virProcessNamespaceHelper(pid_t pid ATTRIBUTE_UNUSED,
  void *opaque)
 {
+virProcessNamespaceHelperData *data = opaque;
 int fd = -1;
 int ret = -1;
 VIR_AUTOFREE(char *) path = NULL;
@@ -1097,16 +1103,9 @@ static int virProcessNamespaceHelper(int errfd,
 goto cleanup;
 }
 
-ret = cb(pid, opaque);
+ret = data->cb(data->pid, data->opaque);
 
  cleanup:
-if (ret < 0) {
-virErrorPtr err = virGetLastError();
-if (err) {
-size_t len = strlen(err->message) + 1;
-ignore_value(safewrite(errfd, err->message, len));
-}
-}
 VIR_FORCE_CLOSE(fd);
 return ret;
 }
@@ -1122,46 +1121,9 @@ virProcessRunInMountNamespace(pid_t pid,
   virProcessNamespaceCallback cb,
   void *opaque)
 {
-int ret = -1;
-pid_t child = -1;
-int errfd[2] = { -1, -1 };
+virProcessNamespaceHelperData data = {.pid = pid, .cb = cb, .opaque = 
opaque};
 
-if (pipe2(errfd, O_CLOEXEC) < 0) {
-virReportSystemError(errno, "%s",
- _("Cannot create pipe for child"));
-return -1;
-}
-
-if ((child = virFork()) < 0)
-goto cleanup;
-
-if (child == 0) {
-VIR_FORCE_CLOSE(errfd[0]);
-ret = virProcessNamespaceHelper(errfd[1], pid,
-cb, opaque);
-VIR_FORCE_CLOSE(errfd[1]);
-_exit(ret < 0 ? EXIT_CANCELED : ret);
-} else {
-int status;
-VIR_AUTOFREE(char *) buf = NULL;
-
-VIR_FORCE_CLOSE(errfd[1]);
-ignore_value(virFileReadHeaderFD(errfd[0], 1024, &buf));
-ret = virProcessWait(child, &status, false);
-if (!ret) {
-ret = status == EXIT_CANCELED ? -1 : status;
-if (ret) {
-virReportError(VIR_ERR_INTERNAL_ERROR,
-   _("child reported: %s"),
-   NULLSTR(buf));
-}
-}
-}
-
- cleanup:
-VIR_FORCE_CLOSE(errfd[0]);
-VIR_FORCE_CLOSE(errfd[1]);
-return ret;
+return virProcessRunInFork(virProcessNamespaceHelper, &data);
 }
 
 
-- 
2.18.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH v3 5/6] Documentation/ABI/testing/sysfs-bus-vfio-mdev: Update for vfio/mdev aggregation support

2018-10-17 Thread Zhenyu Wang
Update vfio/mdev ABI description for new aggregation attributes.

Cc: Kirti Wankhede 
Cc: Alex Williamson 
Cc: Kevin Tian 
Cc: Cornelia Huck 
Signed-off-by: Zhenyu Wang 
---
 Documentation/ABI/testing/sysfs-bus-vfio-mdev | 25 +++
 1 file changed, 25 insertions(+)

diff --git a/Documentation/ABI/testing/sysfs-bus-vfio-mdev 
b/Documentation/ABI/testing/sysfs-bus-vfio-mdev
index 452dbe39270e..192fe06e60d0 100644
--- a/Documentation/ABI/testing/sysfs-bus-vfio-mdev
+++ b/Documentation/ABI/testing/sysfs-bus-vfio-mdev
@@ -85,6 +85,24 @@ Users:
a particular  that can help in understanding the
features provided by that type of mediated device.
 
+What:   /sys/.../mdev_supported_types//aggregation
+Date:   October 2018
+Contact:Zhenyu Wang 
+Description:
+   Reading this attribute will show number of mdev instances
+   that can be aggregated to assign for one mdev device.
+   This is optional attribute. If this attribute exists that
+   means driver supports to aggregate target mdev type's
+   resources assigned for one mdev device.
+Users:
+   Userspace applications interested in creating mediated
+   device with aggregated type instances. Userspace application
+   should check the number of aggregation instances that could
+   be created before creating mediated device by applying this,
+   e.g
+   # echo "83b8f4f2-509f-382f-3c1e-e6bfe0fa1001,aggregate=XX" > \
+  /sys/devices/foo/mdev_supported_types/foo-1/create
+
 What:   /sys/...///
 Date:   October 2016
 Contact:Kirti Wankhede 
@@ -109,3 +127,10 @@ Description:
is active and the vendor driver doesn't support hot unplug.
Example:
# echo 1 > /sys/bus/mdev/devices//remove
+
+What:   /sys/...///aggregated_instances
+Date:   October 2018
+Contact:Zhenyu Wang 
+Description:
+   This attributes shows number of aggregated instances if this
+   mediated device was created with "aggregate" parameter.
\ No newline at end of file
-- 
2.19.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH v3 4/6] Documentation/vfio-mediated-device.txt: Update for vfio/mdev aggregation support

2018-10-17 Thread Zhenyu Wang
Update vfio/mdev doc on new "aggregate" create parameter, new "aggregation"
attribute and "aggregated_instances" attribute for mdev device.

Cc: Kirti Wankhede 
Cc: Alex Williamson 
Cc: Kevin Tian 
Cc: Cornelia Huck 
Signed-off-by: Zhenyu Wang 
---
 Documentation/vfio-mediated-device.txt | 44 ++
 1 file changed, 38 insertions(+), 6 deletions(-)

diff --git a/Documentation/vfio-mediated-device.txt 
b/Documentation/vfio-mediated-device.txt
index c3f69bcaf96e..cf4849a34c9f 100644
--- a/Documentation/vfio-mediated-device.txt
+++ b/Documentation/vfio-mediated-device.txt
@@ -211,12 +211,20 @@ Directories and files under the sysfs for Each Physical 
Device
   | |   |--- description
   | |   |--- [devices]
   | |--- []
-  |  |--- create
-  |  |--- name
-  |  |--- available_instances
-  |  |--- device_api
-  |  |--- description
-  |  |--- [devices]
+  | ||--- create
+  | ||--- name
+  | ||--- available_instances
+  | ||--- device_api
+  | ||--- description
+  | ||--- [devices]
+  | |--- []
+  | ||--- create
+  | ||--- name
+  | ||--- available_instances
+  | ||--- device_api
+  | ||--- description
+  | ||--- 
+  | ||--- [devices]
 
 * [mdev_supported_types]
 
@@ -260,6 +268,23 @@ Directories and files under the sysfs for Each Physical 
Device
   This attribute should show brief features/description of the type. This is
   optional attribute.
 
+* 
+
+   is an optional attributes to show max number that the
+  instance resources of [] can be aggregated to be assigned
+  for one mdev device. No  attribute means driver doesn't
+  support to aggregate instance resoures for one mdev device.
+   may be less than available_instances which depends on
+  driver.  number can't exceed available_instances.
+
+  Set number of instances by appending "aggregate=N" parameter for
+  create attribute. By default without "aggregate=N" parameter it
+  will create one instance as normal.
+
+Example::
+
+   # echo ",aggregate=N" > create
+
 Directories and Files Under the sysfs for Each mdev Device
 --
 
@@ -268,6 +293,7 @@ Directories and Files Under the sysfs for Each mdev Device
   |- [parent phy device]
   |--- [$MDEV_UUID]
  |--- remove
+|--- 
  |--- mdev_type {link to its type}
  |--- vendor-specific-attributes [optional]
 
@@ -281,6 +307,12 @@ Example::
 
# echo 1 > /sys/bus/mdev/devices/$mdev_UUID/remove
 
+*  (read only)
+
+For mdev created with aggregate parameter, this shows number of
+instances assigned for this mdev. For normal type this attribute will
+not exist.
+
 Mediated device Hot plug
 
 
-- 
2.19.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH v3 1/6] vfio/mdev: Add new "aggregate" parameter for mdev create

2018-10-17 Thread Zhenyu Wang
For special mdev type which can aggregate instances for mdev device,
this extends mdev create interface by allowing extra "aggregate=xxx"
parameter, which is passed to mdev device model to be able to create
bundled number of instances for target mdev device.

v2: create new create_with_instances operator for vendor driver
v3:
- Change parameter name as "aggregate="
- Fix new interface comments.
- Parameter checking for new option, pass UUID string only to
  parse and properly end parameter for kstrtouint() conversion.

Cc: Kirti Wankhede 
Cc: Alex Williamson 
Cc: Kevin Tian 
Cc: Cornelia Huck 
Signed-off-by: Zhenyu Wang 
---
 drivers/vfio/mdev/mdev_core.c| 21 +
 drivers/vfio/mdev/mdev_private.h |  4 +++-
 drivers/vfio/mdev/mdev_sysfs.c   | 32 
 include/linux/mdev.h | 11 +++
 4 files changed, 59 insertions(+), 9 deletions(-)

diff --git a/drivers/vfio/mdev/mdev_core.c b/drivers/vfio/mdev/mdev_core.c
index 0212f0ee8aea..545c52ec7618 100644
--- a/drivers/vfio/mdev/mdev_core.c
+++ b/drivers/vfio/mdev/mdev_core.c
@@ -104,12 +104,17 @@ static inline void mdev_put_parent(struct mdev_parent 
*parent)
 }
 
 static int mdev_device_create_ops(struct kobject *kobj,
- struct mdev_device *mdev)
+ struct mdev_device *mdev,
+ unsigned int instances)
 {
struct mdev_parent *parent = mdev->parent;
int ret;
 
-   ret = parent->ops->create(kobj, mdev);
+   if (instances > 1) {
+   ret = parent->ops->create_with_instances(kobj, mdev,
+instances);
+   } else
+   ret = parent->ops->create(kobj, mdev);
if (ret)
return ret;
 
@@ -276,7 +281,8 @@ static void mdev_device_release(struct device *dev)
kfree(mdev);
 }
 
-int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid)
+int mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid,
+  unsigned int instances)
 {
int ret;
struct mdev_device *mdev, *tmp;
@@ -287,6 +293,12 @@ int mdev_device_create(struct kobject *kobj, struct device 
*dev, uuid_le uuid)
if (!parent)
return -EINVAL;
 
+   if (instances > 1 &&
+   !parent->ops->create_with_instances) {
+   ret = -EINVAL;
+   goto mdev_fail;
+   }
+
mutex_lock(&mdev_list_lock);
 
/* Check for duplicate */
@@ -316,6 +328,7 @@ int mdev_device_create(struct kobject *kobj, struct device 
*dev, uuid_le uuid)
mdev->dev.bus = &mdev_bus_type;
mdev->dev.release = mdev_device_release;
dev_set_name(&mdev->dev, "%pUl", uuid.b);
+   mdev->type_instances = instances;
 
ret = device_register(&mdev->dev);
if (ret) {
@@ -323,7 +336,7 @@ int mdev_device_create(struct kobject *kobj, struct device 
*dev, uuid_le uuid)
goto mdev_fail;
}
 
-   ret = mdev_device_create_ops(kobj, mdev);
+   ret = mdev_device_create_ops(kobj, mdev, instances);
if (ret)
goto create_fail;
 
diff --git a/drivers/vfio/mdev/mdev_private.h b/drivers/vfio/mdev/mdev_private.h
index b5819b7d7ef7..e90d295d3927 100644
--- a/drivers/vfio/mdev/mdev_private.h
+++ b/drivers/vfio/mdev/mdev_private.h
@@ -33,6 +33,7 @@ struct mdev_device {
struct kref ref;
struct list_head next;
struct kobject *type_kobj;
+   unsigned int type_instances;
bool active;
 };
 
@@ -58,7 +59,8 @@ void parent_remove_sysfs_files(struct mdev_parent *parent);
 int  mdev_create_sysfs_files(struct device *dev, struct mdev_type *type);
 void mdev_remove_sysfs_files(struct device *dev, struct mdev_type *type);
 
-int  mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le 
uuid);
+int  mdev_device_create(struct kobject *kobj, struct device *dev, uuid_le uuid,
+   unsigned int instances);
 int  mdev_device_remove(struct device *dev, bool force_remove);
 
 #endif /* MDEV_PRIVATE_H */
diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c
index 249472f05509..aefed0c8891b 100644
--- a/drivers/vfio/mdev/mdev_sysfs.c
+++ b/drivers/vfio/mdev/mdev_sysfs.c
@@ -54,14 +54,21 @@ static const struct sysfs_ops mdev_type_sysfs_ops = {
 static ssize_t create_store(struct kobject *kobj, struct device *dev,
const char *buf, size_t count)
 {
-   char *str;
+   char *str, *param, *opt = NULL;
uuid_le uuid;
int ret;
+   unsigned int instances = 1;
 
-   if ((count < UUID_STRING_LEN) || (count > UUID_STRING_LEN + 1))
+   if (count < UUID_STRING_LEN)
return -EINVAL;
 
-   str = kstrndup(buf, count, GFP_KERNEL);
+   if ((param = strnchr(buf, count, ',')) == NULL) {
+   if (count > UUID_STRING_LEN + 1)

[libvirt] [PATCH v3 6/6] drm/i915/gvt: Add new type with aggregation support

2018-10-17 Thread Zhenyu Wang
New aggregation type is created for KVMGT which can be used
to combine minimal resource number for target instances, to create
user defined number of resources. For KVMGT, aggregated resource
is determined by memory and fence resource allocation for target
number of instances.

v2:
- apply for new hooks

Cc: Kirti Wankhede 
Cc: Alex Williamson 
Cc: Kevin Tian 
Cc: Cornelia Huck 
Signed-off-by: Zhenyu Wang 
---
 drivers/gpu/drm/i915/gvt/gvt.h   | 11 +--
 drivers/gpu/drm/i915/gvt/kvmgt.c | 53 --
 drivers/gpu/drm/i915/gvt/vgpu.c  | 56 ++--
 3 files changed, 112 insertions(+), 8 deletions(-)

diff --git a/drivers/gpu/drm/i915/gvt/gvt.h b/drivers/gpu/drm/i915/gvt/gvt.h
index 31f6cdbe5c42..cb318079fa74 100644
--- a/drivers/gpu/drm/i915/gvt/gvt.h
+++ b/drivers/gpu/drm/i915/gvt/gvt.h
@@ -241,6 +241,9 @@ struct intel_vgpu {
 struct intel_gvt_gm {
unsigned long vgpu_allocated_low_gm_size;
unsigned long vgpu_allocated_high_gm_size;
+   unsigned long low_avail;
+   unsigned long high_avail;
+   unsigned long fence_avail;
 };
 
 struct intel_gvt_fence {
@@ -292,13 +295,15 @@ struct intel_gvt_firmware {
 
 #define NR_MAX_INTEL_VGPU_TYPES 20
 struct intel_vgpu_type {
-   char name[16];
+   const char *drv_name;
+   char name[32];
unsigned int avail_instance;
unsigned int low_gm_size;
unsigned int high_gm_size;
unsigned int fence;
unsigned int weight;
enum intel_vgpu_edid resolution;
+   unsigned int aggregation;
 };
 
 struct intel_gvt {
@@ -484,7 +489,7 @@ void intel_gvt_clean_vgpu_types(struct intel_gvt *gvt);
 struct intel_vgpu *intel_gvt_create_idle_vgpu(struct intel_gvt *gvt);
 void intel_gvt_destroy_idle_vgpu(struct intel_vgpu *vgpu);
 struct intel_vgpu *intel_gvt_create_vgpu(struct intel_gvt *gvt,
-struct intel_vgpu_type *type);
+struct intel_vgpu_type *type, unsigned 
int);
 void intel_gvt_destroy_vgpu(struct intel_vgpu *vgpu);
 void intel_gvt_release_vgpu(struct intel_vgpu *vgpu);
 void intel_gvt_reset_vgpu_locked(struct intel_vgpu *vgpu, bool dmlr,
@@ -563,7 +568,7 @@ struct intel_gvt_ops {
int (*emulate_mmio_write)(struct intel_vgpu *, u64, void *,
unsigned int);
struct intel_vgpu *(*vgpu_create)(struct intel_gvt *,
-   struct intel_vgpu_type *);
+ struct intel_vgpu_type *, unsigned 
int);
void (*vgpu_destroy)(struct intel_vgpu *vgpu);
void (*vgpu_release)(struct intel_vgpu *vgpu);
void (*vgpu_reset)(struct intel_vgpu *);
diff --git a/drivers/gpu/drm/i915/gvt/kvmgt.c b/drivers/gpu/drm/i915/gvt/kvmgt.c
index c1072143da1d..841ad5437c4a 100644
--- a/drivers/gpu/drm/i915/gvt/kvmgt.c
+++ b/drivers/gpu/drm/i915/gvt/kvmgt.c
@@ -501,7 +501,9 @@ static void kvmgt_put_vfio_device(void *vgpu)
vfio_device_put(((struct intel_vgpu *)vgpu)->vdev.vfio_device);
 }
 
-static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
+static int intel_vgpu_create_internal(struct kobject *kobj,
+ struct mdev_device *mdev,
+ unsigned int instances)
 {
struct intel_vgpu *vgpu = NULL;
struct intel_vgpu_type *type;
@@ -520,7 +522,14 @@ static int intel_vgpu_create(struct kobject *kobj, struct 
mdev_device *mdev)
goto out;
}
 
-   vgpu = intel_gvt_ops->vgpu_create(gvt, type);
+   if (instances > type->aggregation) {
+   gvt_vgpu_err("wrong aggregation specified for type %s\n",
+   kobject_name(kobj));
+   ret = -EINVAL;
+   goto out;
+   }
+
+   vgpu = intel_gvt_ops->vgpu_create(gvt, type, instances);
if (IS_ERR_OR_NULL(vgpu)) {
ret = vgpu == NULL ? -EFAULT : PTR_ERR(vgpu);
gvt_err("failed to create intel vgpu: %d\n", ret);
@@ -540,6 +549,44 @@ static int intel_vgpu_create(struct kobject *kobj, struct 
mdev_device *mdev)
return ret;
 }
 
+static int intel_vgpu_create(struct kobject *kobj, struct mdev_device *mdev)
+{
+   return intel_vgpu_create_internal(kobj, mdev, 1);
+}
+
+static int intel_vgpu_create_with_instances(struct kobject *kobj,
+   struct mdev_device *mdev,
+   unsigned int instances)
+{
+   return intel_vgpu_create_internal(kobj, mdev, instances);
+}
+
+static int intel_vgpu_max_aggregated_instances(struct kobject *kobj,
+  struct device *dev,
+  unsigned int *max)
+{
+   struct intel_vgpu_type *type;
+   struct intel_gvt *gvt;
+   int ret = 0;
+
+   gvt = kdev_to_i915(dev)->gvt;
+
+   type = i

[libvirt] [PATCH v3 3/6] vfio/mdev: Add "aggregated_instances" attribute for supported mdev device

2018-10-17 Thread Zhenyu Wang
For mdev device created by "aggregate" parameter, this creates new mdev
device attribute "aggregated_instances" to show number of aggregated
instances allocated.

v2:
- change attribute name as "aggregated_instances"

v3:
- create only for aggregated allocation

Cc: Kirti Wankhede 
Cc: Alex Williamson 
Cc: Kevin Tian 
Cc: Cornelia Huck 
Signed-off-by: Zhenyu Wang 
---
 drivers/vfio/mdev/mdev_sysfs.c | 20 
 1 file changed, 20 insertions(+)

diff --git a/drivers/vfio/mdev/mdev_sysfs.c b/drivers/vfio/mdev/mdev_sysfs.c
index a329d6ab6cb9..f03bdfbf5a42 100644
--- a/drivers/vfio/mdev/mdev_sysfs.c
+++ b/drivers/vfio/mdev/mdev_sysfs.c
@@ -292,7 +292,17 @@ static ssize_t remove_store(struct device *dev, struct 
device_attribute *attr,
return count;
 }
 
+static ssize_t
+aggregated_instances_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+   struct mdev_device *mdev = to_mdev_device(dev);
+   return sprintf(buf, "%u\n", mdev->type_instances);
+}
+
 static DEVICE_ATTR_WO(remove);
+static DEVICE_ATTR_RO(aggregated_instances);
 
 static const struct attribute *mdev_device_attrs[] = {
&dev_attr_remove.attr,
@@ -301,6 +311,7 @@ static const struct attribute *mdev_device_attrs[] = {
 
 int  mdev_create_sysfs_files(struct device *dev, struct mdev_type *type)
 {
+   struct mdev_device *mdev = to_mdev_device(dev);
int ret;
 
ret = sysfs_create_link(type->devices_kobj, &dev->kobj, dev_name(dev));
@@ -315,8 +326,17 @@ int  mdev_create_sysfs_files(struct device *dev, struct 
mdev_type *type)
if (ret)
goto create_files_failed;
 
+   if (mdev->type_instances > 1) {
+   ret = sysfs_create_file(&dev->kobj,
+   &dev_attr_aggregated_instances.attr);
+   if (ret)
+   goto create_aggregated_failed;
+   }
+
return ret;
 
+create_aggregated_failed:
+   sysfs_remove_files(&dev->kobj, mdev_device_attrs);
 create_files_failed:
sysfs_remove_link(&dev->kobj, "mdev_type");
 type_link_failed:
-- 
2.19.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH v3 0/6] VFIO mdev aggregated resources handling

2018-10-17 Thread Zhenyu Wang
Current mdev device create interface depends on fixed mdev type, which get uuid
from user to create instance of mdev device. If user wants to use customized
number of resource for mdev device, then only can create new mdev type for that
which may not be flexible. This requirement comes not only from to be able to
allocate flexible resources for KVMGT, but also from Intel scalable IO
virtualization which would use vfio/mdev to be able to allocate arbitrary
resources on mdev instance. More info on [1] [2] [3].

To allow to create user defined resources for mdev, it trys to extend mdev
create interface by adding new "aggregate=xxx" parameter following UUID, for
target mdev type if aggregation is supported, it can create new mdev device
which contains resources combined by number of instances, e.g

echo ",aggregate=10" > create

VM manager e.g libvirt can check mdev type with "aggregation" attribute which
can support this setting. If no "aggregation" attribute found for mdev type,
previous behavior is still kept for one instance allocation. And new sysfs
attribute "aggregated_instances" is created for each mdev device to show 
allocated number.

References:
[1] 
https://software.intel.com/en-us/download/intel-virtualization-technology-for-directed-io-architecture-specification
[2] 
https://software.intel.com/en-us/download/intel-scalable-io-virtualization-technical-specification
  
[3] https://schd.ws/hosted_files/lc32018/00/LC3-SIOV-final.pdf

v2:
  - Add new create_with_instances driver hook
  - Update doc for new attributes

v3:
  - Rename parameter and attribute names from review
  - Make "aggregation" attribute to show maxium resource number
for aggregation
  - Check driver hooks for attribute create validation
  - Update doc and ABI spec

Zhenyu Wang (6):
  vfio/mdev: Add new "aggregate" parameter for mdev create
  vfio/mdev: Add "aggregation" attribute for supported mdev type
  vfio/mdev: Add "aggregated_instances" attribute for supported mdev
device
  Documentation/vfio-mediated-device.txt: Update for vfio/mdev
aggregation support
  Documentation/ABI/testing/sysfs-bus-vfio-mdev: Update for vfio/mdev
aggregation support
  drm/i915/gvt: Add new type with aggregation support

 Documentation/ABI/testing/sysfs-bus-vfio-mdev | 25 +++
 Documentation/vfio-mediated-device.txt| 44 +--
 drivers/gpu/drm/i915/gvt/gvt.h| 11 ++-
 drivers/gpu/drm/i915/gvt/kvmgt.c  | 53 -
 drivers/gpu/drm/i915/gvt/vgpu.c   | 56 +-
 drivers/vfio/mdev/mdev_core.c | 40 +-
 drivers/vfio/mdev/mdev_private.h  |  6 +-
 drivers/vfio/mdev/mdev_sysfs.c| 74 ++-
 include/linux/mdev.h  | 19 +
 9 files changed, 305 insertions(+), 23 deletions(-)

-- 
2.19.1

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH v2 1/1] Add attribute single_usage_restriction for mdev type-id

2018-10-17 Thread Christoph Hellwig
I don't see any patch actually adding (nevermind using) the attibute,
so something seems wrong here.

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH 3/3] qemu: don't duplicate suspended events and state changes

2018-10-17 Thread Nikolay Shirokovskiy



On 16.10.2018 21:46, John Ferlan wrote:
> 
> $SUBJ:
> 
> s/don't/Don't
> 
> On 10/10/18 4:04 AM, Nikolay Shirokovskiy wrote:
>> Now when STOP event handler has correct both suspended event reason
>> and paused state reason let's wipe out duplicated event sending and
>> state changed in/after qemuProcessStopCPUs.
> 
> Since the STOP event handler can use the pausedReason as sent to
> qemuProcessStopCPUs, we no longer need to send duplicate suspended
> lifecycle events because we know what caused the stop along with extra
> details. This processing allows us to also remove the duplicated state
> change from qemuProcessStopCPUs.
> 
> 
>>
>> Signed-off-by: Nikolay Shirokovskiy 
>> ---
>>  src/qemu/qemu_driver.c| 26 +++---
>>  src/qemu/qemu_migration.c | 42 ++
>>  src/qemu/qemu_migration.h |  4 
>>  src/qemu/qemu_process.c   | 13 +++--
>>  4 files changed, 16 insertions(+), 69 deletions(-)
>>
>> diff --git a/src/qemu/qemu_driver.c b/src/qemu/qemu_driver.c
>> index a52e249..7e08768 100644
>> --- a/src/qemu/qemu_driver.c
>> +++ b/src/qemu/qemu_driver.c
>> @@ -1792,10 +1792,8 @@ static int qemuDomainSuspend(virDomainPtr dom)
>>  virQEMUDriverPtr driver = dom->conn->privateData;
>>  virDomainObjPtr vm;
>>  int ret = -1;
>> -virObjectEventPtr event = NULL;
>>  qemuDomainObjPrivatePtr priv;
>>  virDomainPausedReason reason;
>> -int eventDetail;
>>  int state;
>>  virQEMUDriverConfigPtr cfg = NULL;
>>  
>> @@ -1814,16 +1812,12 @@ static int qemuDomainSuspend(virDomainPtr dom)
>>  if (virDomainObjCheckActive(vm) < 0)
>>  goto endjob;
>>  
>> -if (priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_OUT) {
>> +if (priv->job.asyncJob == QEMU_ASYNC_JOB_MIGRATION_OUT)
>>  reason = VIR_DOMAIN_PAUSED_MIGRATION;
>> -eventDetail = VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED;
>> -} else if (priv->job.asyncJob == QEMU_ASYNC_JOB_SNAPSHOT) {
>> +else if (priv->job.asyncJob == QEMU_ASYNC_JOB_SNAPSHOT)
>>  reason = VIR_DOMAIN_PAUSED_SNAPSHOT;
>> -eventDetail = -1; /* don't create lifecycle events when doing 
>> snapshot */
> 
> So with these changes how do we handle this special case? See commit
> f569b87f5 when this was introduced.
> 
> Do we need to adjust qemuProcessHandleStop to not generate the event
> when "reason == VIR_DOMAIN_PAUSED_SNAPSHOT && priv->job.asyncJob ==
> QEMU_ASYNC_JOB_SNAPSHOT"?

Well we had lifecycle event anyway because of stop handler so I decided
to keep sending it. However I'm not sure why it was not sent in 
qemuDomainSuspend
originally. For example for migration we do send event. Looks like this
event does not hurt anyone if it survives up to now.

I'm ok with commit message to if the case.

Nikolay

> 
> The rest is OK - just need your thoughts on this particular case for the
> R-By.
> 
> John
> 
>> -} else {
>> +else
>>  reason = VIR_DOMAIN_PAUSED_USER;
>> -eventDetail = VIR_DOMAIN_EVENT_SUSPENDED_PAUSED;
>> -}
>>  
> 
> [...]
> 

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH 2/3] qemu: map suspended state reason to suspended event detail

2018-10-17 Thread Nikolay Shirokovskiy



On 16.10.2018 21:42, John Ferlan wrote:
> 
> $SUBJ:
> 
> s/map/Map
> 
> On 10/10/18 4:04 AM, Nikolay Shirokovskiy wrote:
>> Map is based on existing cases in code where we send suspended
>> event after changing domain state to paused.
>>
>> Signed-off-by: Nikolay Shirokovskiy 
>> ---
>>  src/qemu/qemu_domain.c  | 34 ++
>>  src/qemu/qemu_domain.h  |  3 +++
>>  src/qemu/qemu_process.c |  9 ++---
>>  3 files changed, 43 insertions(+), 3 deletions(-)
>>
>> diff --git a/src/qemu/qemu_domain.c b/src/qemu/qemu_domain.c
>> index f00f1b3..557743b 100644
>> --- a/src/qemu/qemu_domain.c
>> +++ b/src/qemu/qemu_domain.c
>> @@ -13558,3 +13558,37 @@ 
>> qemuDomainRunningReasonToResumeEvent(virDomainRunningReason reason)
>>  
>>  return VIR_DOMAIN_EVENT_RESUMED_UNPAUSED;
>>  }
>> +
>> +
>> +virDomainEventSuspendedDetailType
>> +qemuDomainPausedReasonToSuspendedEvent(virDomainPausedReason reason)
>> +{
>> +switch (reason) {
>> +case VIR_DOMAIN_PAUSED_MIGRATION:
>> +return VIR_DOMAIN_EVENT_SUSPENDED_MIGRATED;
>> +
>> +case VIR_DOMAIN_PAUSED_FROM_SNAPSHOT:
>> +return VIR_DOMAIN_EVENT_SUSPENDED_FROM_SNAPSHOT;
>> +
>> +case VIR_DOMAIN_PAUSED_POSTCOPY_FAILED:
>> +return VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY_FAILED;
>> +
>> +case VIR_DOMAIN_PAUSED_POSTCOPY:
>> +return VIR_DOMAIN_EVENT_SUSPENDED_POSTCOPY;
>> +
>> +case VIR_DOMAIN_PAUSED_UNKNOWN:
>> +case VIR_DOMAIN_PAUSED_USER:
>> +case VIR_DOMAIN_PAUSED_SAVE:
>> +case VIR_DOMAIN_PAUSED_DUMP:
>> +case VIR_DOMAIN_PAUSED_IOERROR:
>> +case VIR_DOMAIN_PAUSED_WATCHDOG:
>> +case VIR_DOMAIN_PAUSED_SHUTTING_DOWN:
>> +case VIR_DOMAIN_PAUSED_SNAPSHOT:
> 
> This one may cause issues in the next patch...
> 
>> +case VIR_DOMAIN_PAUSED_CRASHED:
>> +case VIR_DOMAIN_PAUSED_STARTING_UP:
>> +case VIR_DOMAIN_PAUSED_LAST:
>> +break;
>> +}
>> +
>> +return VIR_DOMAIN_EVENT_SUSPENDED_PAUSED;
>> +}
>> diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
>> index 380ea14..ee88266 100644
>> --- a/src/qemu/qemu_domain.h
>> +++ b/src/qemu/qemu_domain.h
>> @@ -1093,4 +1093,7 @@ void qemuDomainStorageIdReset(qemuDomainObjPrivatePtr 
>> priv);
>>  virDomainEventResumedDetailType
>>  qemuDomainRunningReasonToResumeEvent(virDomainRunningReason reason);
>>  
>> +virDomainEventSuspendedDetailType
>> +qemuDomainPausedReasonToSuspendedEvent(virDomainPausedReason reason);
>> +
>>  #endif /* __QEMU_DOMAIN_H__ */
>> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
>> index 27021b9..6858377 100644
>> --- a/src/qemu/qemu_process.c
>> +++ b/src/qemu/qemu_process.c
>> @@ -642,7 +642,7 @@ qemuProcessHandleStop(qemuMonitorPtr mon 
>> ATTRIBUTE_UNUSED,
>>  virQEMUDriverPtr driver = opaque;
>>  virObjectEventPtr event = NULL;
>>  virDomainPausedReason reason;
>> -virDomainEventSuspendedDetailType detail = 
>> VIR_DOMAIN_EVENT_SUSPENDED_PAUSED;
>> +virDomainEventSuspendedDetailType detail;
>>  virQEMUDriverConfigPtr cfg = virQEMUDriverGetConfig(driver);
>>  qemuDomainObjPrivatePtr priv = vm->privateData;
>>  
>> @@ -668,8 +668,11 @@ qemuProcessHandleStop(qemuMonitorPtr mon 
>> ATTRIBUTE_UNUSED,
>>  }
>>  }
>>  
>> -VIR_DEBUG("Transitioned guest %s to paused state, reason %s",
>> -  vm->def->name, virDomainPausedReasonTypeToString(reason));
>> +detail = qemuDomainPausedReasonToSuspendedEvent(reason);
> 
> This setting of @detail overrides the @detail set just above...

But overrides to the same value.

> 
> It seems we can remove the @detail setting above in this patch as well
> as the need for the { } and { } around the setting of @reason in this
> patch instead of the subsequent one. Also, may as well place the check
> on the same line since it fits, e.g.:
> 
> if (priv->job.current->status == QEMU_DOMAIN_JOB_STATUS_POSTCOPY)
> reason = VIR_DOMAIN_PAUSED_POSTCOPY;
> else
> reason = VIR_DOMAIN_PAUSED_MIGRATION;
> 
> With that adjustment,
> 
> Reviewed-by: John Ferlan 
> 
> John
> 
> Similar to patch 1, I can make the changes for you...

I'm ok here too, but we can left removing setting details for 
QEMU_DOMAIN_JOB_STATUS_POSTCOPY
as well as this patch correct in this aspect.

Nikolay

> 
>> +VIR_DEBUG("Transitioned guest %s to paused state, "
>> +  "reason %s, event detail %d",
>> +  vm->def->name, virDomainPausedReasonTypeToString(reason),
>> +  detail);
>>  
>>  if (priv->job.current)
>>  ignore_value(virTimeMillisNow(&priv->job.current->stopped));
>>

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH] conf: Fix typos in pcie controllers' name

2018-10-17 Thread Andrea Bolognani
On Wed, 2018-10-17 at 09:10 +0800, Han Han wrote:
> Signed-off-by: Han Han 
> ---
>  src/conf/domain_addr.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
> 
> diff --git a/src/conf/domain_addr.c b/src/conf/domain_addr.c
> index 442e6aab94..e4ed143b76 100644
> --- a/src/conf/domain_addr.c
> +++ b/src/conf/domain_addr.c
> @@ -158,9 +158,9 @@ virDomainPCIAddressFlagsCompatible(virPCIDeviceAddressPtr 
> addr,
>  } else if (devFlags & VIR_PCI_CONNECT_TYPE_PCIE_ROOT_PORT) {
>  connectStr = "pcie-root-port";
>  } else if (devFlags & 
> VIR_PCI_CONNECT_TYPE_PCIE_SWITCH_UPSTREAM_PORT) {
> -connectStr = "pci-switch-upstream-port";
> +connectStr = "pcie-switch-upstream-port";
>  } else if (devFlags & 
> VIR_PCI_CONNECT_TYPE_PCIE_SWITCH_DOWNSTREAM_PORT) {
> -connectStr = "pci-switch-downstream-port";
> +connectStr = "pcie-switch-downstream-port";
>  } else if (devFlags & VIR_PCI_CONNECT_TYPE_DMI_TO_PCI_BRIDGE) {
>  connectStr = "dmi-to-pci-bridge";
>  } else if (devFlags & VIR_PCI_CONNECT_TYPE_PCIE_TO_PCI_BRIDGE) {

Good catch!

Reviewed-by: Andrea Bolognani 

and pushed :)

-- 
Andrea Bolognani / Red Hat / Virtualization

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH] conf: More clear error msg for incomplete coalesce xml

2018-10-17 Thread Martin Kletzander

On Tue, Oct 16, 2018 at 07:19:41PM -0400, John Ferlan wrote:



On 10/14/18 10:26 AM, Han Han wrote:

https://bugzilla.redhat.com/show_bug.cgi?id=1535930

Report more clear err msg instead of unknown error when coalesce
settings is incomplete.



Incomplete is not an error.  It's request for removal of the missing setting.
There is no problem if it is incomplete.

If you look at the BZ the problem is not the incomplete XML, but the fact that
the code that should update the device fails somewhere without setting an error.
Actually, there should not be an error, it should work.  So this just works
around the actual issue.

Having said that maybe the problem is somewhere in the parsing part, but this is
not the solution.  We need to "go deeper" to find out why the updating code
fails and then figure out what to fix from there, not put a sheet over the
problem.


Signed-off-by: Han Han 
---
 src/conf/domain_conf.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
index 9911d56130..e755f45d3d 100644
--- a/src/conf/domain_conf.c
+++ b/src/conf/domain_conf.c
@@ -7804,8 +7804,12 @@ virDomainNetDefCoalesceParseXML(xmlNodePtr node,
 ctxt->node = node;

 str = virXPathString("string(./rx/frames/@max)", ctxt);
-if (!str)
+if (!str) {
+virReportError(VIR_ERR_XML_DETAIL,
+   "%s",


This can be put on the previous line


+   _("incomplete coalesce settings in interface xml"));


and specifically this could be is missing rx frames max attributes

However, according to the RNG from commit 523c9960, it seems the 'rx' is
optional as is the '@max' value.  Maybe Martin should provide a comment
on this series since he added it.

Of course that would cause the whole  to disappear on Format.
It would also cause problems because def->coalesce would have something
that's empty.

So perhaps the best thing to do is pass the @def into here, then only if
we get beyond the initial !str comparison do we allocate and fill it in;
otherwise, we return 0 if rx/frames/@max is not there.  Prepares us for
the future.

I guess I'm not 100% clear if max frames == 0 what would happen. Maybe
Martin knows (I've CC'd him).



`rx-frames=0` turns that option off.  It's like not having the parameter there
at all (it also works like this with ethtool).


John


 goto cleanup;
+}

 if (VIR_ALLOC(ret) < 0)
 goto cleanup;



signature.asc
Description: Digital signature
--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list

Re: [libvirt] [PATCH 1/3] qemu: pass stop reason from qemuProcessStopCPUs to stop handler

2018-10-17 Thread Nikolay Shirokovskiy



On 16.10.2018 21:40, John Ferlan wrote:
> 
> $SUBJ:
> 
> s/pass/Pass
> 
> On 10/10/18 4:04 AM, Nikolay Shirokovskiy wrote:
>> We send duplicate suspended lifecycle events on qemu process stop in several
>> places. The reason is stop event handler always send suspended event and
>> we addidionally send same event but with more presise reason after call
> 
> *additionally
> *precise
> 
>> to qemuProcessStopCPUs. Domain state change is also duplicated.
>>
>> Let's change domain state and send event only in handler. For this
>> purpuse first let's pass state change reason to event handler (event
> 
> *purpose
> 
>> reason is deducible from it).
>>
>> Inspired by similar patch for resume: 5dab984ed
>> "qemu: Pass running reason to RESUME event handler".
>>
> 
> In any case, I think the above was a bit more appropriate for the cover
> since it details a few things being altered in the 3rd patch of the
> series. So, maybe this could change to:
> 
> Similar to commit 5dab984ed which saves and passes the running reason to
> the RESUME event handler, during qemuProcessStopCPUs let's save and pass
> the pause reason in the domain private data so that the STOP event
> handler can use it.
> 
>> Signed-off-by: Nikolay Shirokovskiy 
>> ---
>>  src/qemu/qemu_domain.h  |  4 
>>  src/qemu/qemu_process.c | 15 ---
>>  2 files changed, 16 insertions(+), 3 deletions(-)
>>
>> diff --git a/src/qemu/qemu_domain.h b/src/qemu/qemu_domain.h
>> index 80bd4bd..380ea14 100644
>> --- a/src/qemu/qemu_domain.h
>> +++ b/src/qemu/qemu_domain.h
>> @@ -370,6 +370,10 @@ struct _qemuDomainObjPrivate {
>>  /* qemuProcessStartCPUs stores the reason for starting vCPUs here for 
>> the
>>   * RESUME event handler to use it */
>>  virDomainRunningReason runningReason;
>> +
>> +/* qemuProcessStopCPUs stores the reason for starting vCPUs here for the
> 
> s/starting/pausing/
> 
> too much copy-pasta
> 
> FWIW: Similar to the comment I made to Jirka for his series, I assume
> this STOP processing would have the similar issue with the event going
> to the old libvirtd and thus not needing to save/restore via XML state
> processing. For context see:
> 
> https://www.redhat.com/archives/libvir-list/2018-September/msg01145.html
> 
>> + * STOP event handler to use it */
>> +virDomainPausedReason pausedReason;
>>  };
>>  
> 
> With a couple of adjustments,
> 
> Reviewed-by: John Ferlan 
> 
> John
> 
> I can make the adjustments so that you don't need to send another series
> - just need your acknowledgment for that.
> 

I'm ok with you changes

Nikolay

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


[libvirt] [PATCH] util: Fix a typo in comments of virresctrl.c

2018-10-17 Thread Wang Huaqiang
Signed-off-by: Wang Huaqiang 
---
 src/util/virresctrl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/util/virresctrl.c b/src/util/virresctrl.c
index df5b512..5d811a2 100644
--- a/src/util/virresctrl.c
+++ b/src/util/virresctrl.c
@@ -259,7 +259,7 @@ virResctrlInfoMonFree(virResctrlInfoMonPtr mon)
  * all of them.  While doing that we store the bitmask in a sparse array of
  * virBitmaps named `masks` indexed the same way as `sizes`.  The upper bounds
  * of the sparse arrays are stored in nmasks or nsizes, respectively.
- + *
+ *
  * =Memory Bandwidth allocation technology (MBA)=
  *
  * The memory bandwidth allocation support in virResctrlAlloc works in the
-- 
2.7.4

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH] nwfilter: fix learning address thread shutdown

2018-10-17 Thread Nikolay Shirokovskiy



On 17.10.2018 01:28, John Ferlan wrote:
> 
> 
> On 10/12/18 3:23 AM, Nikolay Shirokovskiy wrote:
>> If learning thread is configured to learn on all ethernet frames (which is
>> hardcoded) then chances are big that there is packet on every iteration of
>> inspecting frames loop. As result we will hang on shutdown because we don't
>> check threadsTerminate if there is packet.
>>
>> Let's just check termination conditions on every iteration.
>>
>> Signed-off-by: Nikolay Shirokovskiy 
>> ---
>>  src/nwfilter/nwfilter_learnipaddr.c | 22 +++---
>>  1 file changed, 7 insertions(+), 15 deletions(-)
>>
>> diff --git a/src/nwfilter/nwfilter_learnipaddr.c 
>> b/src/nwfilter/nwfilter_learnipaddr.c
>> index 008c24b..e6cb996 100644
>> --- a/src/nwfilter/nwfilter_learnipaddr.c
>> +++ b/src/nwfilter/nwfilter_learnipaddr.c
>> @@ -483,6 +483,12 @@ learnIPAddressThread(void *arg)
>>  while (req->status == 0 && vmaddr == 0) {
>>  int n = poll(fds, ARRAY_CARDINALITY(fds), PKT_TIMEOUT_MS);
>>  
>> +if (threadsTerminate || req->terminate) {
>> +req->status = ECANCELED;
>> +showError = false;
>> +break;
>> +}
>> +
> 
> So the theory is that regardless of whether there is a timeout or not,
> let's check for termination markers before checking poll call return
> status.  Which is fine; however, ...
> 
>>  if (n < 0) {
>>  if (errno == EAGAIN || errno == EINTR)
>>  continue;
>> @@ -492,15 +498,8 @@ learnIPAddressThread(void *arg)
>>  break;
>>  }
>>  
>> -if (n == 0) {
>> -if (threadsTerminate || req->terminate) {
>> -VIR_DEBUG("Terminate request seen, cancelling pcap");
>> -req->status = ECANCELED;
>> -showError = false;
>> -break;
>> -}
>> +if (n == 0)
>>  continue;
>> -}
>>  
>>  if (fds[0].revents & (POLLHUP | POLLERR)) {
>>  VIR_DEBUG("Error from FD probably dev deleted");
>> @@ -512,13 +511,6 @@ learnIPAddressThread(void *arg)
>>  packet = pcap_next(handle, &header);
>>  
>>  if (!packet) {
>> -/* Already handled with poll, but lets be sure */
>> -if (threadsTerminate || req->terminate) {
>> -req->status = ECANCELED;
>> -showError = false;
>> -break;
>> -}
>> -
> 
> Why remove this one? Is it possible termination was set after the poll
> and if fetching the packet fails, then if these are true let's get out
> before possibly continuing back to the poll (which I assume would
> timeout and fail).  Secondary question would be should this be separated
> from the other part?

I guess pcap_next does not takes much time (as it does not wait for IO)
so there is a little chance things change after the above check. And
even if they are timeout is small and we terminate with little delay
right after poll.

As to the second question I'm not sure why separation is useful.

Nikolay

> 
> Just need to ask - maybe the answer alters the commit message a little
> bit too.
> 
> John
> 
>>  /* Again, already handled above, but lets be sure */
>>  if (virNetDevValidateConfig(req->binding->portdevname, NULL, 
>> req->ifindex) <= 0) {
>>  virResetLastError();
>>

--
libvir-list mailing list
libvir-list@redhat.com
https://www.redhat.com/mailman/listinfo/libvir-list


Re: [libvirt] [PATCH] libvirt: add daemon itself as shutdown reason

2018-10-17 Thread Nikolay Shirokovskiy



On 16.10.2018 15:48, John Ferlan wrote:
> 
> 
> On 10/8/18 7:21 AM, Nikolay Shirokovskiy wrote:
>> Let's introduce shutdown reason "daemon" which means we have to
>> kill running domain ourselves as the best action we can take at
>> that moment. Failure to pick up domain on daemon restart is
>> one example of such case. Using reason "crashed" is a bit misleading
>> as it is used when qemu is actually crashed or qemu destroyed on
>> guest panic as result of user choice that is the problem was
>> in qemu/guest itself. So I propose to use "crashed" only for
>> qemu side issues and introduce "daemon" when we have to kill the qemu
>> for good.
> 
> How about (although reading below may shed some more context):
> 
> This patch introduces a new shutdown reason "daemon" in order
> to indicate that the daemon needed to force shutdown the domain
> as the best course of action to take at the moment.
> 
> This action would occur during Reconnect processing when it's
> determined that either the QEMU monitor no longer exists for
> some unknown reason or creation of a thread to reconnect the
> domain failed.

Mostly I'm fine with this one except "monitor no longer exists"
is condition for reason "crashed" or "unknown" (Martin's contribution)

> 
>>
>> There is another example where "daemon" will be useful. If we can
>> not reboot domain we kill it and got "crashed" reason now. Looks
>> like good candidate for "daemon" reason.
> 
> If you feel this way, then a followup patch could/should be posted;
> otherwise, this'll be lost to commit message heaven where all good ideas
> go to die ;-).

Sure!

> 
>>
>> Signed-off-by: Nikolay Shirokovskiy 
>> ---
>>  include/libvirt/libvirt-domain.h |  1 +
>>  src/conf/domain_conf.c   |  3 ++-
>>  src/qemu/qemu_process.c  | 11 ---
>>  tools/virsh-domain-monitor.c |  3 ++-
>>  4 files changed, 9 insertions(+), 9 deletions(-)
>>
>> diff --git a/include/libvirt/libvirt-domain.h 
>> b/include/libvirt/libvirt-domain.h
>> index fdd2d6b..11fdab5 100644
>> --- a/include/libvirt/libvirt-domain.h
>> +++ b/include/libvirt/libvirt-domain.h
>> @@ -145,6 +145,7 @@ typedef enum {
>>  VIR_DOMAIN_SHUTOFF_FAILED = 6,  /* domain failed to start */
>>  VIR_DOMAIN_SHUTOFF_FROM_SNAPSHOT = 7, /* restored from a snapshot which 
>> was
>> * taken while domain was shutoff 
>> */
>> +VIR_DOMAIN_SHUTOFF_DAEMON = 8,  /* daemon have to kill domain */
> 
> s/have to/decides to/
> 
>>  # ifdef VIR_ENUM_SENTINELS
>>  VIR_DOMAIN_SHUTOFF_LAST
>>  # endif
>> diff --git a/src/conf/domain_conf.c b/src/conf/domain_conf.c
>> index 9911d56..e441723 100644
>> --- a/src/conf/domain_conf.c
>> +++ b/src/conf/domain_conf.c
>> @@ -779,7 +779,8 @@ VIR_ENUM_IMPL(virDomainShutoffReason, 
>> VIR_DOMAIN_SHUTOFF_LAST,
>>"migrated",
>>"saved",
>>"failed",
>> -  "from snapshot")
>> +  "from snapshot",
>> +  "daemon")
>>  
>>  VIR_ENUM_IMPL(virDomainCrashedReason, VIR_DOMAIN_CRASHED_LAST,
>>"unknown",
>> diff --git a/src/qemu/qemu_process.c b/src/qemu/qemu_process.c
>> index e9c7618..c4bc9ca 100644
>> --- a/src/qemu/qemu_process.c
>> +++ b/src/qemu/qemu_process.c
>> @@ -7988,15 +7988,12 @@ qemuProcessReconnect(void *opaque)
>>  /* We can't get the monitor back, so must kill the VM
>>   * to remove danger of it ending up running twice if
>>   * user tries to start it again later
> 
> Since we're changing anyway, let's put the stop there, e.g.
> "s/later/later./"
> 
>> - * If we couldn't get the monitor since QEMU supports
>> - * no-shutdown, we can safely say that the domain
>> - * crashed ... */
>> -state = VIR_DOMAIN_SHUTOFF_CRASHED;
>> -/* If BeginJob failed, we jumped here without a job, let's hope 
>> another
>> + * If BeginJob failed, we jumped here without a job, let's hope 
>> another
>>   * thread didn't have a chance to start playing with the domain yet
>>   * (it's all we can do anyway).
>>   */
>> -qemuProcessStop(driver, obj, state, QEMU_ASYNC_JOB_NONE, stopFlags);
>> +qemuProcessStop(driver, obj, VIR_DOMAIN_SHUTOFF_DAEMON,
>> +QEMU_ASYNC_JOB_NONE, stopFlags);
>>  }
>>  goto cleanup;
>>  }
>> @@ -8035,7 +8032,7 @@ qemuProcessReconnectHelper(virDomainObjPtr obj,
>>   * is no thread that could be doing anything else with the same 
>> domain
>>   * object.
>>   */
>> -qemuProcessStop(src->driver, obj, VIR_DOMAIN_SHUTOFF_FAILED,
>> +qemuProcessStop(src->driver, obj, VIR_DOMAIN_SHUTOFF_DAEMON,
>>  QEMU_ASYNC_JOB_NONE, 0);
>>  qemuDomainRemoveInactiveJobLocked(src->driver, obj);
>>  
> 
> When qemuProcessReconnectHelper was introduced (commit d38897a5d)
> reconnection failure used VIR_DOMAIN_SHUTOFF_FAILED; however, that was
>