Hello community, here is the log from the commit of package xen for openSUSE:Factory checked in at 2015-09-24 07:16:58 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/xen (Old) and /work/SRC/openSUSE:Factory/.xen.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "xen" Changes: -------- --- /work/SRC/openSUSE:Factory/xen/xen.changes 2015-08-31 22:57:44.000000000 +0200 +++ /work/SRC/openSUSE:Factory/.xen.new/xen.changes 2015-09-24 07:16:59.000000000 +0200 @@ -1,0 +2,38 @@ +Mon Sep 16 07:17:00 UTC 2015 - cy...@suse.com + +- bsc#945165 - Xl pci-attach show error with kernel of SLES 12 sp1 + pci-attach-fix.patch + +------------------------------------------------------------------- +Tue Sep 15 14:48:27 MDT 2015 - jfeh...@suse.com + +- bsc#945164 - Xl destroy show error with kernel of SLES 12 sp1 + 5537a4d8-libxl-use-DEBUG-log-level-instead-of-INFO.patch + +------------------------------------------------------------------- +Wed Sep 9 08:55:29 MDT 2015 - carn...@suse.com + +- Upstream patches from Jan + 55dc78e9-x86-amd_ucode-skip-updates-for-final-levels.patch + 55dc7937-x86-IO-APIC-don-t-create-pIRQ-mapping-from-masked-RTE.patch + 55df2f76-IOMMU-skip-domains-without-page-tables-when-dumping.patch + 55e43fd8-x86-NUMA-fix-setup_node.patch + 55e43ff8-x86-NUMA-don-t-account-hotplug-regions.patch + 55e593f1-x86-NUMA-make-init_node_heap-respect-Xen-heap-limit.patch + 54c2553c-grant-table-use-uint16_t-consistently-for-offset-and-length.patch + 54ca33bc-grant-table-refactor-grant-copy-to-reduce-duplicate-code.patch + 54ca340e-grant-table-defer-releasing-pages-acquired-in-a-grant-copy.patch + +------------------------------------------------------------------- +Tue Sep 8 11:26:45 MDT 2015 - carn...@suse.com + +- bsc#944463 - VUL-0: CVE-2015-5239: qemu-kvm: Integer overflow in + vnc_client_read() and protocol_client_msg() + CVE-2015-5239-qemuu-limit-client_cut_text-msg-payload-size.patch + CVE-2015-5239-qemut-limit-client_cut_text-msg-payload-size.patch +- bsc#944697 - VUL-1: CVE-2015-6815: qemu: net: e1000: infinite + loop issue + CVE-2015-6815-qemuu-e1000-fix-infinite-loop.patch + CVE-2015-6815-qemut-e1000-fix-infinite-loop.patch + +------------------------------------------------------------------- New: ---- 54c2553c-grant-table-use-uint16_t-consistently-for-offset-and-length.patch 54ca33bc-grant-table-refactor-grant-copy-to-reduce-duplicate-code.patch 54ca340e-grant-table-defer-releasing-pages-acquired-in-a-grant-copy.patch 5537a4d8-libxl-use-DEBUG-log-level-instead-of-INFO.patch 55dc78e9-x86-amd_ucode-skip-updates-for-final-levels.patch 55dc7937-x86-IO-APIC-don-t-create-pIRQ-mapping-from-masked-RTE.patch 55df2f76-IOMMU-skip-domains-without-page-tables-when-dumping.patch 55e43fd8-x86-NUMA-fix-setup_node.patch 55e43ff8-x86-NUMA-don-t-account-hotplug-regions.patch 55e593f1-x86-NUMA-make-init_node_heap-respect-Xen-heap-limit.patch CVE-2015-5239-qemut-limit-client_cut_text-msg-payload-size.patch CVE-2015-5239-qemuu-limit-client_cut_text-msg-payload-size.patch CVE-2015-6815-qemut-e1000-fix-infinite-loop.patch CVE-2015-6815-qemuu-e1000-fix-infinite-loop.patch pci-attach-fix.patch ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ xen.spec ++++++ --- /var/tmp/diff_new_pack.mHwCXF/_old 2015-09-24 07:17:03.000000000 +0200 +++ /var/tmp/diff_new_pack.mHwCXF/_new 2015-09-24 07:17:03.000000000 +0200 @@ -159,7 +159,7 @@ %endif %endif -Version: 4.5.1_07 +Version: 4.5.1_08 Release: 0 Summary: Xen Virtualization: Hypervisor (aka VMM aka Microkernel) License: GPL-2.0 @@ -223,6 +223,13 @@ Patch19: 55a77e4f-dmar-device-scope-mem-leak-fix.patch Patch20: 55c1d83d-x86-gdt-Drop-write-only-xalloc-d-array.patch Patch21: 55c3232b-x86-mm-Make-hap-shadow-teardown-preemptible.patch +Patch22: 55dc78e9-x86-amd_ucode-skip-updates-for-final-levels.patch +Patch23: 55dc7937-x86-IO-APIC-don-t-create-pIRQ-mapping-from-masked-RTE.patch +Patch24: 55df2f76-IOMMU-skip-domains-without-page-tables-when-dumping.patch +Patch25: 55e43fd8-x86-NUMA-fix-setup_node.patch +Patch26: 55e43ff8-x86-NUMA-don-t-account-hotplug-regions.patch +Patch27: 55e593f1-x86-NUMA-make-init_node_heap-respect-Xen-heap-limit.patch +Patch28: 5537a4d8-libxl-use-DEBUG-log-level-instead-of-INFO.patch Patch131: CVE-2015-4106-xsa131-9.patch Patch137: CVE-2015-3259-xsa137.patch Patch139: xsa139-qemuu.patch @@ -256,6 +263,10 @@ Patch262: CVE-2015-5154-qemut-check-array-bounds-before-writing-to-io_buffer.patch Patch263: CVE-2015-5154-qemut-fix-START-STOP-UNIT-command-completion.patch Patch264: CVE-2015-5154-qemut-clear-DRQ-after-handling-all-expected-accesses.patch +Patch265: CVE-2015-6815-qemuu-e1000-fix-infinite-loop.patch +Patch266: CVE-2015-6815-qemut-e1000-fix-infinite-loop.patch +Patch267: CVE-2015-5239-qemuu-limit-client_cut_text-msg-payload-size.patch +Patch268: CVE-2015-5239-qemut-limit-client_cut_text-msg-payload-size.patch # Our platform specific patches Patch301: xen-destdir.patch Patch302: vif-bridge-no-iptables.patch @@ -325,6 +336,7 @@ Patch471: qemu-xen-enable-spice-support.patch Patch472: tigervnc-long-press.patch Patch473: xendomains-libvirtd-conflict.patch +Patch474: pci-attach-fix.patch # Hypervisor and PV driver Patches Patch501: x86-ioapic-ack-default.patch Patch502: x86-cpufreq-report.patch @@ -351,6 +363,10 @@ Patch708: 55b0a283-x86-MSI-X-teardown.patch Patch709: 55b0a2ab-x86-MSI-X-enable.patch Patch710: 55b0a2db-x86-MSI-track-guest-masking.patch +# grant table performance improvements +Patch715: 54c2553c-grant-table-use-uint16_t-consistently-for-offset-and-length.patch +Patch716: 54ca33bc-grant-table-refactor-grant-copy-to-reduce-duplicate-code.patch +Patch717: 54ca340e-grant-table-defer-releasing-pages-acquired-in-a-grant-copy.patch # ticket locks Patch720: 552d0fd2-x86-hvm-don-t-include-asm-spinlock-h.patch Patch721: 552d0fe8-x86-mtrr-include-asm-atomic.h.patch @@ -602,6 +618,13 @@ %patch19 -p1 %patch20 -p1 %patch21 -p1 +%patch22 -p1 +%patch23 -p1 +%patch24 -p1 +%patch25 -p1 +%patch26 -p1 +%patch27 -p1 +%patch28 -p1 %patch131 -p1 %patch137 -p1 %patch139 -p1 @@ -635,6 +658,10 @@ %patch262 -p1 %patch263 -p1 %patch264 -p1 +%patch265 -p1 +%patch266 -p1 +%patch267 -p1 +%patch268 -p1 # Our platform specific patches %patch301 -p1 %patch302 -p1 @@ -703,6 +730,7 @@ %patch471 -p1 %patch472 -p1 %patch473 -p1 +%patch474 -p1 # Hypervisor and PV driver Patches %patch501 -p1 %patch502 -p1 @@ -729,6 +757,10 @@ %patch708 -p1 %patch709 -p1 %patch710 -p1 +# grant table performance improvements +%patch715 -p1 +%patch716 -p1 +%patch717 -p1 # ticket locks %patch720 -p1 %patch721 -p1 ++++++ 54c2553c-grant-table-use-uint16_t-consistently-for-offset-and-length.patch ++++++ # Commit b7f74a19fe099e373ad52e4218c466f3e91b5f43 # Date 2015-01-23 15:05:48 +0100 # Author David Vrabel <david.vra...@citrix.com> # Committer Jan Beulich <jbeul...@suse.com> grant-table: use uint16_t consistently for grant copy offset and length Signed-off-by: David Vrabel <david.vra...@citrix.com> Acked-by: Jan Beulich <jbeul...@suse.com> --- a/xen/common/grant_table.c +++ b/xen/common/grant_table.c @@ -1882,7 +1882,7 @@ static int __acquire_grant_for_copy( struct domain *rd, unsigned long gref, domid_t ldom, int readonly, unsigned long *frame, struct page_info **page, - unsigned *page_off, unsigned *length, unsigned allow_transitive) + uint16_t *page_off, uint16_t *length, unsigned allow_transitive) { struct grant_table *rgt = rd->grant_table; grant_entry_v1_t *sha1; @@ -1895,8 +1895,8 @@ __acquire_grant_for_copy( grant_ref_t trans_gref; struct domain *td; unsigned long grant_frame; - unsigned trans_page_off; - unsigned trans_length; + uint16_t trans_page_off; + uint16_t trans_length; int is_sub_page; s16 rc = GNTST_okay; @@ -2122,7 +2122,7 @@ __gnttab_copy( if ( src_is_gref ) { - unsigned source_off, source_len; + uint16_t source_off, source_len; rc = __acquire_grant_for_copy(sd, op->source.u.ref, current->domain->domain_id, 1, &s_frame, &s_pg, @@ -2147,7 +2147,7 @@ __gnttab_copy( if ( dest_is_gref ) { - unsigned dest_off, dest_len; + uint16_t dest_off, dest_len; rc = __acquire_grant_for_copy(dd, op->dest.u.ref, current->domain->domain_id, 0, &d_frame, &d_pg, &dest_off, &dest_len, 1); ++++++ 54ca33bc-grant-table-refactor-grant-copy-to-reduce-duplicate-code.patch ++++++ # Commit 3c72f8c2cf19f735d813081c836f03e3078ee5c1 # Date 2015-01-29 14:21:00 +0100 # Author David Vrabel <david.vra...@citrix.com> # Committer Jan Beulich <jbeul...@suse.com> grant-table: refactor grant copy to reduce duplicate code Much of the grant copy operation is identical for the source and destination buffers. Refactor the code into per-buffer functions. Signed-off-by: David Vrabel <david.vra...@citrix.com> Reviewed-by: Jan Beulich <jbeul...@suse.com> Reviewed-by: Tim Deegan <t...@xen.org> --- a/xen/common/grant_table.c +++ b/xen/common/grant_table.c @@ -2077,139 +2077,230 @@ __acquire_grant_for_copy( return rc; } -static void -__gnttab_copy( - struct gnttab_copy *op) -{ - struct domain *sd = NULL, *dd = NULL; - unsigned long s_frame, d_frame; - struct page_info *s_pg = NULL, *d_pg = NULL; - char *sp, *dp; - s16 rc = GNTST_okay; - int have_d_grant = 0, have_s_grant = 0; - int src_is_gref, dest_is_gref; - - if ( ((op->source.offset + op->len) > PAGE_SIZE) || - ((op->dest.offset + op->len) > PAGE_SIZE) ) - PIN_FAIL(error_out, GNTST_bad_copy_arg, "copy beyond page area.\n"); +struct gnttab_copy_buf { + /* Guest provided. */ + struct gnttab_copy_ptr ptr; + uint16_t len; + + /* Mapped etc. */ + struct domain *domain; + unsigned long frame; + struct page_info *page; + void *virt; + bool_t read_only; + bool_t have_grant; + bool_t have_type; +}; - src_is_gref = op->flags & GNTCOPY_source_gref; - dest_is_gref = op->flags & GNTCOPY_dest_gref; +static int gnttab_copy_lock_domain(domid_t domid, unsigned int gref_flag, + struct gnttab_copy_buf *buf) +{ + int rc; - if ( (op->source.domid != DOMID_SELF && !src_is_gref ) || - (op->dest.domid != DOMID_SELF && !dest_is_gref) ) - PIN_FAIL(error_out, GNTST_permission_denied, + if ( domid != DOMID_SELF && !gref_flag ) + PIN_FAIL(out, GNTST_permission_denied, "only allow copy-by-mfn for DOMID_SELF.\n"); - if ( op->source.domid == DOMID_SELF ) - sd = rcu_lock_current_domain(); - else if ( (sd = rcu_lock_domain_by_id(op->source.domid)) == NULL ) - PIN_FAIL(error_out, GNTST_bad_domain, - "couldn't find %d\n", op->source.domid); - - if ( op->dest.domid == DOMID_SELF ) - dd = rcu_lock_current_domain(); - else if ( (dd = rcu_lock_domain_by_id(op->dest.domid)) == NULL ) - PIN_FAIL(error_out, GNTST_bad_domain, - "couldn't find %d\n", op->dest.domid); + if ( domid == DOMID_SELF ) + buf->domain = rcu_lock_current_domain(); + else + { + buf->domain = rcu_lock_domain_by_id(domid); + if ( buf->domain == NULL ) + PIN_FAIL(out, GNTST_bad_domain, "couldn't find %d\n", domid); + } - rc = xsm_grant_copy(XSM_HOOK, sd, dd); - if ( rc ) + buf->ptr.domid = domid; + rc = GNTST_okay; + out: + return rc; +} + +static void gnttab_copy_unlock_domains(struct gnttab_copy_buf *src, + struct gnttab_copy_buf *dest) +{ + if ( src->domain ) + { + rcu_unlock_domain(src->domain); + src->domain = NULL; + } + if ( dest->domain ) + { + rcu_unlock_domain(dest->domain); + dest->domain = NULL; + } +} + +static int gnttab_copy_lock_domains(const struct gnttab_copy *op, + struct gnttab_copy_buf *src, + struct gnttab_copy_buf *dest) +{ + int rc; + + rc = gnttab_copy_lock_domain(op->source.domid, + op->flags & GNTCOPY_source_gref, src); + if ( rc < 0 ) + goto error; + rc = gnttab_copy_lock_domain(op->dest.domid, + op->flags & GNTCOPY_dest_gref, dest); + if ( rc < 0 ) + goto error; + + rc = xsm_grant_copy(XSM_HOOK, src->domain, dest->domain); + if ( rc < 0 ) { rc = GNTST_permission_denied; - goto error_out; + goto error; } + return 0; + + error: + gnttab_copy_unlock_domains(src, dest); + return rc; +} - if ( src_is_gref ) +static void gnttab_copy_release_buf(struct gnttab_copy_buf *buf) +{ + if ( buf->virt ) { - uint16_t source_off, source_len; - rc = __acquire_grant_for_copy(sd, op->source.u.ref, - current->domain->domain_id, 1, - &s_frame, &s_pg, - &source_off, &source_len, 1); - if ( rc != GNTST_okay ) - goto error_out; - have_s_grant = 1; - if ( op->source.offset < source_off || - op->len > source_len ) - PIN_FAIL(error_out, GNTST_general_error, - "copy source out of bounds: %d < %d || %d > %d\n", - op->source.offset, source_off, - op->len, source_len); + unmap_domain_page(buf->virt); + buf->virt = NULL; } - else + if ( buf->have_type ) { - rc = __get_paged_frame(op->source.u.gmfn, &s_frame, &s_pg, 1, sd); - if ( rc != GNTST_okay ) - PIN_FAIL(error_out, rc, - "source frame %lx invalid.\n", s_frame); + put_page_type(buf->page); + buf->have_type = 0; + } + if ( buf->page ) + { + put_page(buf->page); + buf->page = NULL; + } + if ( buf->have_grant ) + { + __release_grant_for_copy(buf->domain, buf->ptr.u.ref, buf->read_only); + buf->have_grant = 0; } +} + +static int gnttab_copy_claim_buf(const struct gnttab_copy *op, + const struct gnttab_copy_ptr *ptr, + struct gnttab_copy_buf *buf, + unsigned int gref_flag) +{ + int rc; + + buf->read_only = gref_flag == GNTCOPY_source_gref; - if ( dest_is_gref ) + if ( op->flags & gref_flag ) { - uint16_t dest_off, dest_len; - rc = __acquire_grant_for_copy(dd, op->dest.u.ref, - current->domain->domain_id, 0, - &d_frame, &d_pg, &dest_off, &dest_len, 1); + rc = __acquire_grant_for_copy(buf->domain, ptr->u.ref, + current->domain->domain_id, + buf->read_only, + &buf->frame, &buf->page, + &buf->ptr.offset, &buf->len, 1); if ( rc != GNTST_okay ) - goto error_out; - have_d_grant = 1; - if ( op->dest.offset < dest_off || - op->len > dest_len ) - PIN_FAIL(error_out, GNTST_general_error, - "copy dest out of bounds: %d < %d || %d > %d\n", - op->dest.offset, dest_off, - op->len, dest_len); + goto out; + buf->ptr.u.ref = ptr->u.ref; + buf->have_grant = 1; } else { - rc = __get_paged_frame(op->dest.u.gmfn, &d_frame, &d_pg, 0, dd); + rc = __get_paged_frame(ptr->u.gmfn, &buf->frame, &buf->page, + buf->read_only, buf->domain); if ( rc != GNTST_okay ) - PIN_FAIL(error_out, rc, - "destination frame %lx invalid.\n", d_frame); + PIN_FAIL(out, rc, + "source frame %lx invalid.\n", ptr->u.gmfn); + + buf->ptr.u.gmfn = ptr->u.gmfn; + buf->ptr.offset = 0; + buf->len = PAGE_SIZE; } - if ( !get_page_type(d_pg, PGT_writable_page) ) + if ( !buf->read_only ) { - if ( !dd->is_dying ) - gdprintk(XENLOG_WARNING, "Could not get dst frame %lx\n", d_frame); - rc = GNTST_general_error; - goto error_out; - } - - sp = map_domain_page(s_frame); - dp = map_domain_page(d_frame); - - memcpy(dp + op->dest.offset, sp + op->source.offset, op->len); - - unmap_domain_page(dp); - unmap_domain_page(sp); - - gnttab_mark_dirty(dd, d_frame); - - put_page_type(d_pg); - error_out: - if ( d_pg ) - put_page(d_pg); - if ( s_pg ) - put_page(s_pg); - if ( have_s_grant ) - __release_grant_for_copy(sd, op->source.u.ref, 1); - if ( have_d_grant ) - __release_grant_for_copy(dd, op->dest.u.ref, 0); - if ( sd ) - rcu_unlock_domain(sd); - if ( dd ) - rcu_unlock_domain(dd); - op->status = rc; + if ( !get_page_type(buf->page, PGT_writable_page) ) + { + if ( !buf->domain->is_dying ) + gdprintk(XENLOG_WARNING, "Could not get writable frame %lx\n", buf->frame); + rc = GNTST_general_error; + goto out; + } + buf->have_type = 1; + } + + buf->virt = map_domain_page(buf->frame); + rc = GNTST_okay; + + out: + return rc; } -static long -gnttab_copy( +static int gnttab_copy_buf(const struct gnttab_copy *op, + struct gnttab_copy_buf *dest, + const struct gnttab_copy_buf *src) +{ + int rc; + + if ( ((op->source.offset + op->len) > PAGE_SIZE) || + ((op->dest.offset + op->len) > PAGE_SIZE) ) + PIN_FAIL(out, GNTST_bad_copy_arg, "copy beyond page area.\n"); + + if ( op->source.offset < src->ptr.offset || + op->source.offset + op->len > src->ptr.offset + src->len ) + PIN_FAIL(out, GNTST_general_error, + "copy source out of bounds: %d < %d || %d > %d\n", + op->source.offset, src->ptr.offset, + op->len, src->len); + + if ( op->dest.offset < dest->ptr.offset || + op->dest.offset + op->len > dest->ptr.offset + dest->len ) + PIN_FAIL(out, GNTST_general_error, + "copy dest out of bounds: %d < %d || %d > %d\n", + op->dest.offset, dest->ptr.offset, + op->len, dest->len); + + memcpy(dest->virt + op->dest.offset, src->virt + op->source.offset, + op->len); + gnttab_mark_dirty(dest->domain, dest->frame); + rc = GNTST_okay; + out: + return rc; +} + +static int gnttab_copy_one(const struct gnttab_copy *op, + struct gnttab_copy_buf *dest, + struct gnttab_copy_buf *src) +{ + int rc; + + rc = gnttab_copy_lock_domains(op, src, dest); + if ( rc < 0 ) + goto out; + + rc = gnttab_copy_claim_buf(op, &op->source, src, GNTCOPY_source_gref); + if ( rc < 0 ) + goto out; + + rc = gnttab_copy_claim_buf(op, &op->dest, dest, GNTCOPY_dest_gref); + if ( rc < 0 ) + goto out; + + rc = gnttab_copy_buf(op, dest, src); + out: + gnttab_copy_release_buf(src); + gnttab_copy_release_buf(dest); + gnttab_copy_unlock_domains(src, dest); + return rc; +} + +static long gnttab_copy( XEN_GUEST_HANDLE_PARAM(gnttab_copy_t) uop, unsigned int count) { - int i; + unsigned int i; struct gnttab_copy op; + struct gnttab_copy_buf src = {}; + struct gnttab_copy_buf dest = {}; for ( i = 0; i < count; i++ ) { @@ -2217,7 +2308,9 @@ gnttab_copy( return i; if ( unlikely(__copy_from_guest(&op, uop, 1)) ) return -EFAULT; - __gnttab_copy(&op); + + op.status = gnttab_copy_one(&op, &dest, &src); + if ( unlikely(__copy_field_to_guest(uop, &op, status)) ) return -EFAULT; guest_handle_add_offset(uop, 1); --- a/xen/include/public/grant_table.h +++ b/xen/include/public/grant_table.h @@ -453,7 +453,7 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_transfer_ struct gnttab_copy { /* IN parameters. */ - struct { + struct gnttab_copy_ptr { union { grant_ref_t ref; xen_pfn_t gmfn; ++++++ 54ca340e-grant-table-defer-releasing-pages-acquired-in-a-grant-copy.patch ++++++ # Commit d28f42f2703e483116bafd2b0b76a32af67d83ad # Date 2015-01-29 14:22:22 +0100 # Author David Vrabel <david.vra...@citrix.com> # Committer Jan Beulich <jbeul...@suse.com> grant-table: defer releasing pages acquired in a grant copy Acquiring a page for the source or destination of a grant copy is an expensive operation. A common use case is for two adjacent grant copy ops to operate on either the same source or the same destination page. Instead of always acquiring and releasing destination and source pages for each operation, release the page once it is no longer valid for the next op. If either the source or destination domains changes both pages are released as it is unlikely that either will still be valid. XenServer's performance benchmarks show modest improvements in network receive throughput (netback uses grant copy in the guest Rx path) and no regressions in disk performance (using tapdisk3 which grant copies as the backend). Baseline Deferred Release Interhost receive to VM 7.2 Gb/s ~9 Gbit/s Interhost aggregate 24 Gb/s 28 Gb/s Intrahost single stream 14 Gb/s 14 Gb/s Intrahost aggregate 34 Gb/s 36 Gb/s Aggregate disk write 900 MB/s 900 MB/s Aggregate disk read 890 MB/s 890 MB/s Signed-off-by: David Vrabel <david.vra...@citrix.com> Reviewed-by: Tim Deegan <t...@xen.org> Reviewed-by: Jan Beulich <jbeul...@suse.com> --- a/xen/common/grant_table.c +++ b/xen/common/grant_table.c @@ -2236,6 +2236,17 @@ static int gnttab_copy_claim_buf(const s return rc; } +static bool_t gnttab_copy_buf_valid(const struct gnttab_copy_ptr *p, + const struct gnttab_copy_buf *b, + bool_t has_gref) +{ + if ( !b->virt ) + return 0; + if ( has_gref ) + return b->have_grant && p->u.ref == b->ptr.u.ref; + return p->u.gmfn == b->ptr.u.gmfn; +} + static int gnttab_copy_buf(const struct gnttab_copy *op, struct gnttab_copy_buf *dest, const struct gnttab_copy_buf *src) @@ -2274,23 +2285,40 @@ static int gnttab_copy_one(const struct { int rc; - rc = gnttab_copy_lock_domains(op, src, dest); - if ( rc < 0 ) - goto out; + if ( !src->domain || op->source.domid != src->ptr.domid || + !dest->domain || op->dest.domid != dest->ptr.domid ) + { + gnttab_copy_release_buf(src); + gnttab_copy_release_buf(dest); + gnttab_copy_unlock_domains(src, dest); - rc = gnttab_copy_claim_buf(op, &op->source, src, GNTCOPY_source_gref); - if ( rc < 0 ) - goto out; + rc = gnttab_copy_lock_domains(op, src, dest); + if ( rc < 0 ) + goto out; + } - rc = gnttab_copy_claim_buf(op, &op->dest, dest, GNTCOPY_dest_gref); - if ( rc < 0 ) - goto out; + /* Different source? */ + if ( !gnttab_copy_buf_valid(&op->source, src, + op->flags & GNTCOPY_source_gref) ) + { + gnttab_copy_release_buf(src); + rc = gnttab_copy_claim_buf(op, &op->source, src, GNTCOPY_source_gref); + if ( rc < 0 ) + goto out; + } + + /* Different dest? */ + if ( !gnttab_copy_buf_valid(&op->dest, dest, + op->flags & GNTCOPY_dest_gref) ) + { + gnttab_copy_release_buf(dest); + rc = gnttab_copy_claim_buf(op, &op->dest, dest, GNTCOPY_dest_gref); + if ( rc < 0 ) + goto out; + } rc = gnttab_copy_buf(op, dest, src); out: - gnttab_copy_release_buf(src); - gnttab_copy_release_buf(dest); - gnttab_copy_unlock_domains(src, dest); return rc; } @@ -2301,21 +2329,42 @@ static long gnttab_copy( struct gnttab_copy op; struct gnttab_copy_buf src = {}; struct gnttab_copy_buf dest = {}; + long rc = 0; for ( i = 0; i < count; i++ ) { - if (i && hypercall_preempt_check()) - return i; + if ( i && hypercall_preempt_check() ) + { + rc = i; + break; + } + if ( unlikely(__copy_from_guest(&op, uop, 1)) ) - return -EFAULT; + { + rc = -EFAULT; + break; + } op.status = gnttab_copy_one(&op, &dest, &src); + if ( op.status != GNTST_okay ) + { + gnttab_copy_release_buf(&src); + gnttab_copy_release_buf(&dest); + } if ( unlikely(__copy_field_to_guest(uop, &op, status)) ) - return -EFAULT; + { + rc = -EFAULT; + break; + } guest_handle_add_offset(uop, 1); } - return 0; + + gnttab_copy_release_buf(&src); + gnttab_copy_release_buf(&dest); + gnttab_copy_unlock_domains(&src, &dest); + + return rc; } static long ++++++ 5537a4d8-libxl-use-DEBUG-log-level-instead-of-INFO.patch ++++++ References: bsc#945164 Subject: libxl: use DEBUG log level instead of INFO From: Wei Liu wei.l...@citrix.com Fri Apr 17 12:31:29 2015 +0100 Date: Wed Apr 22 14:40:40 2015 +0100: Git: ddc17f311099c1f0f37a771a2f5f904d848102f7 Make libxl less noisy when destroying a domain. Signed-off-by: Wei Liu <wei.l...@citrix.com> Cc: Ian Campbell <ian.campb...@citrix.com> Cc: Ian Jackson <ian.jack...@eu.citrix.com> Acked-by: Ian Campbell <ian.campb...@citrix.com> Index: xen-4.5.1-testing/tools/libxl/libxl.c =================================================================== --- xen-4.5.1-testing.orig/tools/libxl/libxl.c +++ xen-4.5.1-testing/tools/libxl/libxl.c @@ -1688,7 +1688,7 @@ static void devices_destroy_cb(libxl__eg _exit(-1); } } - LOG(INFO, "forked pid %ld for destroy of domain %d", (long)rc, domid); + LOG(DEBUG, "forked pid %ld for destroy of domain %d", (long)rc, domid); return; ++++++ 557eb55f-gnttab-per-active-entry-locking.patch ++++++ --- /var/tmp/diff_new_pack.mHwCXF/_old 2015-09-24 07:17:03.000000000 +0200 +++ /var/tmp/diff_new_pack.mHwCXF/_new 2015-09-24 07:17:03.000000000 +0200 @@ -432,7 +432,7 @@ return rc; } -@@ -2231,7 +2292,6 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA +@@ -2373,7 +2434,6 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA gnttab_set_version_t op; struct domain *d = current->domain; struct grant_table *gt = d->grant_table; @@ -440,7 +440,7 @@ grant_entry_v1_t reserved_entries[GNTTAB_NR_RESERVED_ENTRIES]; long res; int i; -@@ -2256,8 +2316,7 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA +@@ -2398,8 +2458,7 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA { for ( i = GNTTAB_NR_RESERVED_ENTRIES; i < nr_grant_entries(gt); i++ ) { @@ -450,7 +450,7 @@ { gdprintk(XENLOG_WARNING, "tried to change grant table version from %d to %d, but some grant entries still in use\n", -@@ -2444,7 +2503,8 @@ __gnttab_swap_grant_ref(grant_ref_t ref_ +@@ -2586,7 +2645,8 @@ __gnttab_swap_grant_ref(grant_ref_t ref_ { struct domain *d = rcu_lock_current_domain(); struct grant_table *gt = d->grant_table; @@ -460,7 +460,7 @@ s16 rc = GNTST_okay; spin_lock(>->lock); -@@ -2458,12 +2518,16 @@ __gnttab_swap_grant_ref(grant_ref_t ref_ +@@ -2600,12 +2660,16 @@ __gnttab_swap_grant_ref(grant_ref_t ref_ if ( unlikely(ref_b >= nr_grant_entries(d->grant_table))) PIN_FAIL(out, GNTST_bad_gntref, "Bad ref-b (%d).\n", ref_b); @@ -481,7 +481,7 @@ PIN_FAIL(out, GNTST_eagain, "ref b %ld busy\n", (long)ref_b); if ( gt->gt_version == 1 ) -@@ -2490,6 +2554,10 @@ __gnttab_swap_grant_ref(grant_ref_t ref_ +@@ -2632,6 +2696,10 @@ __gnttab_swap_grant_ref(grant_ref_t ref_ } out: @@ -492,7 +492,7 @@ spin_unlock(>->lock); rcu_unlock_domain(d); -@@ -2799,7 +2867,7 @@ grant_table_create( +@@ -2941,7 +3009,7 @@ grant_table_create( struct domain *d) { struct grant_table *t; @@ -501,7 +501,7 @@ if ( (t = xzalloc(struct grant_table)) == NULL ) goto no_mem_0; -@@ -2818,6 +2886,8 @@ grant_table_create( +@@ -2960,6 +3028,8 @@ grant_table_create( if ( (t->active[i] = alloc_xenheap_page()) == NULL ) goto no_mem_2; clear_page(t->active[i]); @@ -510,7 +510,7 @@ } /* Tracking of mapped foreign frames table */ -@@ -2914,7 +2984,7 @@ gnttab_release_mappings( +@@ -3056,7 +3126,7 @@ gnttab_release_mappings( rgt = rd->grant_table; spin_lock(&rgt->lock); @@ -519,7 +519,7 @@ sha = shared_entry_header(rgt, ref); if (rgt->gt_version == 1) status = &sha->flags; -@@ -2972,6 +3042,7 @@ gnttab_release_mappings( +@@ -3114,6 +3184,7 @@ gnttab_release_mappings( if ( act->pin == 0 ) gnttab_clear_flag(_GTF_reading, status); @@ -527,7 +527,7 @@ spin_unlock(&rgt->lock); rcu_unlock_domain(rd); -@@ -3034,9 +3105,12 @@ static void gnttab_usage_print(struct do +@@ -3176,9 +3247,12 @@ static void gnttab_usage_print(struct do uint16_t status; uint64_t frame; @@ -541,7 +541,7 @@ sha = shared_entry_header(gt, ref); -@@ -3066,6 +3140,7 @@ static void gnttab_usage_print(struct do +@@ -3208,6 +3282,7 @@ static void gnttab_usage_print(struct do printk("[%3d] %5d 0x%06lx 0x%08x %5d 0x%06"PRIx64" 0x%02x\n", ref, act->domid, act->frame, act->pin, sha->domid, frame, status); ++++++ 557eb5b6-gnttab-introduce-maptrack-lock.patch ++++++ --- /var/tmp/diff_new_pack.mHwCXF/_old 2015-09-24 07:17:03.000000000 +0200 +++ /var/tmp/diff_new_pack.mHwCXF/_new 2015-09-24 07:17:03.000000000 +0200 @@ -65,7 +65,7 @@ return handle; } -@@ -2874,6 +2874,7 @@ grant_table_create( +@@ -3016,6 +3016,7 @@ grant_table_create( /* Simple stuff. */ spin_lock_init(&t->lock); ++++++ 557eb620-gnttab-make-the-grant-table-lock-a-read-write-lock.patch ++++++ --- /var/tmp/diff_new_pack.mHwCXF/_old 2015-09-24 07:17:03.000000000 +0200 +++ /var/tmp/diff_new_pack.mHwCXF/_new 2015-09-24 07:17:03.000000000 +0200 @@ -576,7 +576,7 @@ return rc; } -@@ -2307,7 +2334,7 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA +@@ -2449,7 +2476,7 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA if ( gt->gt_version == op.version ) goto out; @@ -585,7 +585,7 @@ /* Make sure that the grant table isn't currently in use when we change the version number, except for the first 8 entries which are allowed to be in use (xenstore/xenconsole keeps them mapped). -@@ -2392,7 +2419,7 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA +@@ -2534,7 +2561,7 @@ gnttab_set_version(XEN_GUEST_HANDLE_PARA gt->gt_version = op.version; out_unlock: @@ -594,7 +594,7 @@ out: op.version = gt->gt_version; -@@ -2448,7 +2475,7 @@ gnttab_get_status_frames(XEN_GUEST_HANDL +@@ -2590,7 +2617,7 @@ gnttab_get_status_frames(XEN_GUEST_HANDL op.status = GNTST_okay; @@ -603,7 +603,7 @@ for ( i = 0; i < op.nr_frames; i++ ) { -@@ -2457,7 +2484,7 @@ gnttab_get_status_frames(XEN_GUEST_HANDL +@@ -2599,7 +2626,7 @@ gnttab_get_status_frames(XEN_GUEST_HANDL op.status = GNTST_bad_virt_addr; } @@ -612,7 +612,7 @@ out2: rcu_unlock_domain(d); out1: -@@ -2507,7 +2534,7 @@ __gnttab_swap_grant_ref(grant_ref_t ref_ +@@ -2649,7 +2676,7 @@ __gnttab_swap_grant_ref(grant_ref_t ref_ struct active_grant_entry *act_b = NULL; s16 rc = GNTST_okay; @@ -621,7 +621,7 @@ if ( gt->gt_version == 0 ) PIN_FAIL(out, GNTST_general_error, "grant table not yet set up\n"); -@@ -2558,7 +2585,7 @@ out: +@@ -2700,7 +2727,7 @@ out: active_entry_release(act_b); if ( act_a != NULL ) active_entry_release(act_a); @@ -630,7 +630,7 @@ rcu_unlock_domain(d); -@@ -2629,12 +2656,12 @@ static int __gnttab_cache_flush(gnttab_c +@@ -2771,12 +2798,12 @@ static int __gnttab_cache_flush(gnttab_c if ( d != owner ) { @@ -645,7 +645,7 @@ rcu_unlock_domain(d); put_page(page); return ret; -@@ -2654,7 +2681,7 @@ static int __gnttab_cache_flush(gnttab_c +@@ -2796,7 +2823,7 @@ static int __gnttab_cache_flush(gnttab_c ret = 0; if ( d != owner ) @@ -654,7 +654,7 @@ unmap_domain_page(v); put_page(page); -@@ -2873,7 +2900,7 @@ grant_table_create( +@@ -3015,7 +3042,7 @@ grant_table_create( goto no_mem_0; /* Simple stuff. */ @@ -663,7 +663,7 @@ spin_lock_init(&t->maptrack_lock); t->nr_grant_frames = INITIAL_NR_GRANT_FRAMES; -@@ -2983,7 +3010,7 @@ gnttab_release_mappings( +@@ -3125,7 +3152,7 @@ gnttab_release_mappings( } rgt = rd->grant_table; @@ -672,7 +672,7 @@ act = active_entry_acquire(rgt, ref); sha = shared_entry_header(rgt, ref); -@@ -3044,7 +3071,7 @@ gnttab_release_mappings( +@@ -3186,7 +3213,7 @@ gnttab_release_mappings( gnttab_clear_flag(_GTF_reading, status); active_entry_release(act); @@ -681,7 +681,7 @@ rcu_unlock_domain(rd); -@@ -3092,7 +3119,7 @@ static void gnttab_usage_print(struct do +@@ -3234,7 +3261,7 @@ static void gnttab_usage_print(struct do printk(" -------- active -------- -------- shared --------\n"); printk("[ref] localdom mfn pin localdom gmfn flags\n"); @@ -690,7 +690,7 @@ if ( gt->gt_version == 0 ) goto out; -@@ -3145,7 +3172,7 @@ static void gnttab_usage_print(struct do +@@ -3287,7 +3314,7 @@ static void gnttab_usage_print(struct do } out: ++++++ 5583da09-x86-MSI-track-host-and-guest-masking-separately.patch ++++++ --- /var/tmp/diff_new_pack.mHwCXF/_old 2015-09-24 07:17:03.000000000 +0200 +++ /var/tmp/diff_new_pack.mHwCXF/_new 2015-09-24 07:17:03.000000000 +0200 @@ -137,7 +137,7 @@ spin_unlock_irqrestore(&desc->lock, flags); --- a/xen/arch/x86/irq.c +++ b/xen/arch/x86/irq.c -@@ -2502,6 +2502,25 @@ int unmap_domain_pirq_emuirq(struct doma +@@ -2503,6 +2503,25 @@ int unmap_domain_pirq_emuirq(struct doma return ret; } ++++++ 5583da64-gnttab-use-per-VCPU-maptrack-free-lists.patch ++++++ --- /var/tmp/diff_new_pack.mHwCXF/_old 2015-09-24 07:17:03.000000000 +0200 +++ /var/tmp/diff_new_pack.mHwCXF/_new 2015-09-24 07:17:03.000000000 +0200 @@ -188,7 +188,7 @@ spin_unlock(&lgt->maptrack_lock); -@@ -2919,16 +2961,9 @@ grant_table_create( +@@ -3061,16 +3103,9 @@ grant_table_create( } /* Tracking of mapped foreign frames table */ @@ -207,7 +207,7 @@ /* Shared grant table. */ if ( (t->shared_raw = xzalloc_array(void *, max_grant_frames)) == NULL ) -@@ -2960,8 +2995,7 @@ grant_table_create( +@@ -3102,8 +3137,7 @@ grant_table_create( free_xenheap_page(t->shared_raw[i]); xfree(t->shared_raw); no_mem_3: @@ -217,7 +217,7 @@ no_mem_2: for ( i = 0; i < num_act_frames_from_sha_frames(INITIAL_NR_GRANT_FRAMES); i++ ) -@@ -3096,7 +3130,7 @@ grant_table_destroy( +@@ -3238,7 +3272,7 @@ grant_table_destroy( for ( i = 0; i < nr_maptrack_frames(t); i++ ) free_xenheap_page(t->maptrack[i]); @@ -226,7 +226,7 @@ for ( i = 0; i < nr_active_grant_frames(t); i++ ) free_xenheap_page(t->active[i]); -@@ -3110,6 +3144,12 @@ grant_table_destroy( +@@ -3252,6 +3286,12 @@ grant_table_destroy( d->grant_table = NULL; } ++++++ 55dc78e9-x86-amd_ucode-skip-updates-for-final-levels.patch ++++++ # Commit 22c5675877c8209adcfdb6bceddb561320374529 # Date 2015-08-25 16:17:13 +0200 # Author Aravind Gopalakrishnan <aravind.gopalakrish...@amd.com> # Committer Jan Beulich <jbeul...@suse.com> x86, amd_ucode: skip microcode updates for final levels Some of older[Fam10h] systems require that certain number of applied microcode patch levels should not be overwritten by the microcode loader. Otherwise, system hangs are known to occur. The 'final_levels' of patch ids have been obtained empirically. Refer bug https://bugzilla.suse.com/show_bug.cgi?id=913996 for details of the issue. The short version is that people have predominantly noticed system hang issues when trying to update microcode levels beyond the patch IDs below. [0x01000098, 0x0100009f, 0x010000af] >From internal discussions, we gathered that OS/hypervisor cannot reliably perform microcode updates beyond these levels due to hardware issues. Therefore, we need to abort microcode update process if we hit any of these levels. In this patch, we check for those microcode versions and abort if the current core has one of those final patch levels applied by the BIOS A linux version of the patch has already made it into tip- http://marc.info/?l=linux-kernel&m=143703405627170 Signed-off-by: Aravind Gopalakrishnan <aravind.gopalakrish...@amd.com> Reviewed-by: Andrew Cooper <andrew.coop...@citrix.com> Reviewed-by: Boris Ostrovsky <boris.ostrov...@oracle.com> --- a/xen/arch/x86/microcode_amd.c +++ b/xen/arch/x86/microcode_amd.c @@ -347,6 +347,43 @@ static int container_fast_forward(const return 0; } +/* + * The 'final_levels' of patch ids have been obtained empirically. + * Refer bug https://bugzilla.suse.com/show_bug.cgi?id=913996 + * for details of the issue. The short version is that people + * using certain Fam10h systems noticed system hang issues when + * trying to update microcode levels beyond the patch IDs below. + * From internal discussions, we gathered that OS/hypervisor + * cannot reliably perform microcode updates beyond these levels + * due to hardware issues. Therefore, we need to abort microcode + * update process if we hit any of these levels. + */ +static const unsigned int final_levels[] = { + 0x01000098, + 0x0100009f, + 0x010000af +}; + +static bool_t check_final_patch_levels(unsigned int cpu) +{ + /* + * Check the current patch levels on the cpu. If they are equal to + * any of the 'final_levels', then we should not update the microcode + * patch on the cpu as system will hang otherwise. + */ + struct ucode_cpu_info *uci = &per_cpu(ucode_cpu_info, cpu); + unsigned int i; + + if ( boot_cpu_data.x86 != 0x10 ) + return 0; + + for ( i = 0; i < ARRAY_SIZE(final_levels); i++ ) + if ( uci->cpu_sig.rev == final_levels[i] ) + return 1; + + return 0; +} + static int cpu_request_microcode(int cpu, const void *buf, size_t bufsize) { struct microcode_amd *mc_amd, *mc_old; @@ -369,6 +406,14 @@ static int cpu_request_microcode(int cpu goto out; } + if ( check_final_patch_levels(cpu) ) + { + printk(XENLOG_INFO + "microcode: Cannot update microcode patch on the cpu as we hit a final level\n"); + error = -EPERM; + goto out; + } + mc_amd = xmalloc(struct microcode_amd); if ( !mc_amd ) { ++++++ 55dc7937-x86-IO-APIC-don-t-create-pIRQ-mapping-from-masked-RTE.patch ++++++ # Commit 669d4b85c433674ab3b52ef707af0d3a551c941f # Date 2015-08-25 16:18:31 +0200 # Author Jan Beulich <jbeul...@suse.com> # Committer Jan Beulich <jbeul...@suse.com> x86/IO-APIC: don't create pIRQ mapping from masked RTE While moving our XenoLinux patches to 4.2-rc I noticed bogus "already mapped" messages resulting from Linux (legitimately) writing RTEs with only the mask bit set. Clearly we shouldn't even attempt to create a pIRQ <-> IRQ mapping from such RTEs. In the course of this I also found that the respective message isn't really useful without also printing the pre-existing mapping. And I noticed that map_domain_pirq() allowed IRQ0 to get through, despite us never allowing a domain to control that interrupt. Signed-off-by: Jan Beulich <jbeul...@suse.com> Reviewed-by: Andrew Cooper <andrew.coop...@citrix.com> --- a/xen/arch/x86/io_apic.c +++ b/xen/arch/x86/io_apic.c @@ -2371,9 +2371,14 @@ int ioapic_guest_write(unsigned long phy * pirq and irq mapping. Where the GSI is greater than 256, we assume * that dom0 pirq == irq. */ - pirq = (irq >= 256) ? irq : rte.vector; - if ( (pirq < 0) || (pirq >= hardware_domain->nr_pirqs) ) - return -EINVAL; + if ( !rte.mask ) + { + pirq = (irq >= 256) ? irq : rte.vector; + if ( pirq >= hardware_domain->nr_pirqs ) + return -EINVAL; + } + else + pirq = -1; if ( desc->action ) { @@ -2408,12 +2413,15 @@ int ioapic_guest_write(unsigned long phy printk(XENLOG_INFO "allocated vector %02x for irq %d\n", ret, irq); } - spin_lock(&hardware_domain->event_lock); - ret = map_domain_pirq(hardware_domain, pirq, irq, - MAP_PIRQ_TYPE_GSI, NULL); - spin_unlock(&hardware_domain->event_lock); - if ( ret < 0 ) - return ret; + if ( pirq >= 0 ) + { + spin_lock(&hardware_domain->event_lock); + ret = map_domain_pirq(hardware_domain, pirq, irq, + MAP_PIRQ_TYPE_GSI, NULL); + spin_unlock(&hardware_domain->event_lock); + if ( ret < 0 ) + return ret; + } spin_lock_irqsave(&ioapic_lock, flags); /* Set the correct irq-handling type. */ --- a/xen/arch/x86/irq.c +++ b/xen/arch/x86/irq.c @@ -1906,7 +1906,7 @@ int map_domain_pirq( if ( !irq_access_permitted(current->domain, irq)) return -EPERM; - if ( pirq < 0 || pirq >= d->nr_pirqs || irq < 0 || irq >= nr_irqs ) + if ( pirq < 0 || pirq >= d->nr_pirqs || irq <= 0 || irq >= nr_irqs ) { dprintk(XENLOG_G_ERR, "dom%d: invalid pirq %d or irq %d\n", d->domain_id, pirq, irq); @@ -1919,8 +1919,9 @@ int map_domain_pirq( if ( (old_irq > 0 && (old_irq != irq) ) || (old_pirq && (old_pirq != pirq)) ) { - dprintk(XENLOG_G_WARNING, "dom%d: pirq %d or irq %d already mapped\n", - d->domain_id, pirq, irq); + dprintk(XENLOG_G_WARNING, + "dom%d: pirq %d or irq %d already mapped (%d,%d)\n", + d->domain_id, pirq, irq, old_pirq, old_irq); return 0; } ++++++ 55df2f76-IOMMU-skip-domains-without-page-tables-when-dumping.patch ++++++ # Commit 5f335544cf5b716b0af51223e33373c4a7d65e8c # Date 2015-08-27 17:40:38 +0200 # Author Jan Beulich <jbeul...@suse.com> # Committer Jan Beulich <jbeul...@suse.com> IOMMU: skip domains without page tables when dumping Reported-by: Roger Pau Monné <roger....@citrix.com> Signed-off-by: Jan Beulich <jbeul...@suse.com> Tested-by: Roger Pau Monné <roger....@citrix.com> --- a/xen/drivers/passthrough/iommu.c +++ b/xen/drivers/passthrough/iommu.c @@ -368,7 +368,7 @@ static void iommu_dump_p2m_table(unsigne ops = iommu_get_ops(); for_each_domain(d) { - if ( is_hardware_domain(d) ) + if ( is_hardware_domain(d) || need_iommu(d) <= 0 ) continue; if ( iommu_use_hap_pt(d) ) ++++++ 55e43fd8-x86-NUMA-fix-setup_node.patch ++++++ # Commit 8f945d36d9bddd5b589ba23c7322b30d623dd084 # Date 2015-08-31 13:51:52 +0200 # Author Jan Beulich <jbeul...@suse.com> # Committer Jan Beulich <jbeul...@suse.com> x86/NUMA: fix setup_node() The function referenced an __initdata object (nodes_found). Since this being a node mask was more complicated than needed, the variable gets replaced by a simple counter. Check at once that the count of nodes doesn't go beyond MAX_NUMNODES. Also consolidate three printk()s related to the function's use into just one. Finally (quite the opposite of the above issue) __init-annotate nodes_cover_memory(). Signed-off-by: Jan Beulich <jbeul...@suse.com> Reviewed-by: Andrew Cooper <andrew.coop...@citrix.com> --- a/xen/arch/x86/srat.c +++ b/xen/arch/x86/srat.c @@ -25,7 +25,6 @@ static struct acpi_table_slit *__read_mo static nodemask_t memory_nodes_parsed __initdata; static nodemask_t processor_nodes_parsed __initdata; -static nodemask_t nodes_found __initdata; static struct node nodes[MAX_NUMNODES] __initdata; static u8 __read_mostly pxm2node[256] = { [0 ... 255] = NUMA_NO_NODE }; @@ -45,17 +44,25 @@ int pxm_to_node(int pxm) return (signed char)pxm2node[pxm]; } -__devinit int setup_node(int pxm) +int setup_node(int pxm) { unsigned node = pxm2node[pxm]; - if (node == 0xff) { - if (nodes_weight(nodes_found) >= MAX_NUMNODES) + + if (node == NUMA_NO_NODE) { + static bool_t warned; + static unsigned nodes_found; + + node = nodes_found++; + if (node >= MAX_NUMNODES) { + printk(KERN_WARNING + "SRAT: Too many proximity domains (%#x)\n", + pxm); + warned = 1; return -1; - node = first_unset_node(nodes_found); - node_set(node, nodes_found); + } pxm2node[pxm] = node; } - return pxm2node[pxm]; + return node; } int valid_numa_range(u64 start, u64 end, int node) @@ -176,7 +183,6 @@ acpi_numa_x2apic_affinity_init(struct ac pxm = pa->proximity_domain; node = setup_node(pxm); if (node < 0) { - printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm); bad_srat(); return; } @@ -209,7 +215,6 @@ acpi_numa_processor_affinity_init(struct } node = setup_node(pxm); if (node < 0) { - printk(KERN_ERR "SRAT: Too many proximity domains %x\n", pxm); bad_srat(); return; } @@ -253,7 +258,6 @@ acpi_numa_memory_affinity_init(struct ac pxm &= 0xff; node = setup_node(pxm); if (node < 0) { - printk(KERN_ERR "SRAT: Too many proximity domains.\n"); bad_srat(); return; } @@ -295,7 +299,7 @@ acpi_numa_memory_affinity_init(struct ac /* Sanity check to catch more bad SRATs (they are amazingly common). Make sure the PXMs cover all memory. */ -static int nodes_cover_memory(void) +static int __init nodes_cover_memory(void) { int i; ++++++ 55e43ff8-x86-NUMA-don-t-account-hotplug-regions.patch ++++++ # Commit c011f470e6e79208f5baa071b4d072b78c88e2ba # Date 2015-08-31 13:52:24 +0200 # Author Jan Beulich <jbeul...@suse.com> # Committer Jan Beulich <jbeul...@suse.com> x86/NUMA: don't account hotplug regions ... except in cases where they really matter: node_memblk_range[] now is the only place all regions get stored. nodes[] and NODE_DATA() track present memory only. This improves the reporting when nodes have disjoint "normal" and hotplug regions, with the hotplug region sitting above the highest populated page. In such cases a node's spanned-pages value (visible in both XEN_SYSCTL_numainfo and 'u' debug key output) covered all the way up to top of populated memory, giving quite different a picture from what an otherwise identically configured system without and hotplug regions would report. Note, however, that the actual hotplug case (as well as cases of nodes with multiple disjoint present regions) is still not being handled such that the reported values would represent how much memory a node really has (but that can be considered intentional). Reported-by: Jim Fehlig <jfeh...@suse.com> This at once makes nodes_cover_memory() no longer consider E820_RAM regions covered by SRAT hotplug regions. Also reject self-overlaps with mismatching hotplug flags. Signed-off-by: Jan Beulich <jbeul...@suse.com> Reviewed-by: Andrew Cooper <andrew.coop...@citrix.com> Tested-by: Jim Fehlig <jfeh...@suse.com> --- a/xen/arch/x86/srat.c +++ b/xen/arch/x86/srat.c @@ -32,7 +32,7 @@ static u8 __read_mostly pxm2node[256] = static int num_node_memblks; static struct node node_memblk_range[NR_NODE_MEMBLKS]; static int memblk_nodeid[NR_NODE_MEMBLKS]; - +static __initdata DECLARE_BITMAP(memblk_hotplug, NR_NODE_MEMBLKS); static int node_to_pxm(int n); @@ -89,9 +89,9 @@ static __init int conflicting_memblks(u6 if (nd->start == nd->end) continue; if (nd->end > start && nd->start < end) - return memblk_nodeid[i]; + return i; if (nd->end == end && nd->start == start) - return memblk_nodeid[i]; + return i; } return -1; } @@ -229,7 +229,6 @@ acpi_numa_processor_affinity_init(struct void __init acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) { - struct node *nd; u64 start, end; int node, pxm; int i; @@ -263,30 +262,40 @@ acpi_numa_memory_affinity_init(struct ac } /* It is fine to add this area to the nodes data it will be used later*/ i = conflicting_memblks(start, end); - if (i == node) { - printk(KERN_WARNING - "SRAT: Warning: PXM %d (%"PRIx64"-%"PRIx64") overlaps with itself (%" - PRIx64"-%"PRIx64")\n", pxm, start, end, nodes[i].start, nodes[i].end); - } else if (i >= 0) { + if (i < 0) + /* everything fine */; + else if (memblk_nodeid[i] == node) { + bool_t mismatch = !(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) != + !test_bit(i, memblk_hotplug); + + printk("%sSRAT: PXM %u (%"PRIx64"-%"PRIx64") overlaps with itself (%"PRIx64"-%"PRIx64")\n", + mismatch ? KERN_ERR : KERN_WARNING, pxm, start, end, + node_memblk_range[i].start, node_memblk_range[i].end); + if (mismatch) { + bad_srat(); + return; + } + } else { printk(KERN_ERR - "SRAT: PXM %d (%"PRIx64"-%"PRIx64") overlaps with PXM %d (%" - PRIx64"-%"PRIx64")\n", pxm, start, end, node_to_pxm(i), - nodes[i].start, nodes[i].end); + "SRAT: PXM %u (%"PRIx64"-%"PRIx64") overlaps with PXM %u (%"PRIx64"-%"PRIx64")\n", + pxm, start, end, node_to_pxm(memblk_nodeid[i]), + node_memblk_range[i].start, node_memblk_range[i].end); bad_srat(); return; } - nd = &nodes[node]; - if (!node_test_and_set(node, memory_nodes_parsed)) { - nd->start = start; - nd->end = end; - } else { - if (start < nd->start) + if (!(ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE)) { + struct node *nd = &nodes[node]; + + if (!node_test_and_set(node, memory_nodes_parsed)) { nd->start = start; - if (nd->end < end) nd->end = end; + } else { + if (start < nd->start) + nd->start = start; + if (nd->end < end) + nd->end = end; + } } - if ((ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) && end > mem_hotplug) - mem_hotplug = end; printk(KERN_INFO "SRAT: Node %u PXM %u %"PRIx64"-%"PRIx64"%s\n", node, pxm, start, end, ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE ? " (hotplug)" : ""); @@ -294,6 +303,11 @@ acpi_numa_memory_affinity_init(struct ac node_memblk_range[num_node_memblks].start = start; node_memblk_range[num_node_memblks].end = end; memblk_nodeid[num_node_memblks] = node; + if (ma->flags & ACPI_SRAT_MEM_HOT_PLUGGABLE) { + __set_bit(num_node_memblks, memblk_hotplug); + if (end > mem_hotplug) + mem_hotplug = end; + } num_node_memblks++; } ++++++ 55e593f1-x86-NUMA-make-init_node_heap-respect-Xen-heap-limit.patch ++++++ # Commit 88e3ed61642bb393458acc7a9bd2f96edc337190 # Date 2015-09-01 14:02:57 +0200 # Author Jan Beulich <jbeul...@suse.com> # Committer Jan Beulich <jbeul...@suse.com> x86/NUMA: make init_node_heap() respect Xen heap limit On NUMA systems, where we try to use node local memory for the basic control structures of the buddy allocator, this special case needs to take into consideration a possible address width limit placed on the Xen heap. In turn this (but also other, more abstract considerations) requires that xenheap_max_mfn() not be called more than once (at most we might permit it to be called a second time with a larger value than was passed the first time), and be called only before calling end_boot_allocator(). While inspecting all the involved code, a couple of off-by-one issues were found (and are being corrected here at once): - arch_init_memory() cleared one too many page table slots - the highmem_start based invocation of xenheap_max_mfn() passed too big a value - xenheap_max_mfn() calculated the wrong bit count in edge cases Signed-off-by: Jan Beulich <jbeul...@suse.com> Reviewed-by: Andrew Cooper <andrew.coop...@citrix.com> Acked-by: Ian Campbell <ian.campb...@citrix.com> Release-acked-by: Wei Liu <wei.l...@citrix.com> # Commit 0a7167d9b20cdc48e6ea320fbbb920b3267c9757 # Date 2015-09-04 14:58:07 +0100 # Author Julien Grall <julien.gr...@citrix.com> # Committer Ian Campbell <ian.campb...@citrix.com> xen/arm64: do not (incorrectly) limit size of xenheap The commit 88e3ed61642bb393458acc7a9bd2f96edc337190 "x86/NUMA: make init_node_heap() respect Xen heap limit" breaks boot on the arm64 board X-Gene. The xenheap bits variable is used to know the last RAM MFN always mapped in Xen virtual memory. If the value is 0, it means that all the memory is always mapped in Xen virtual memory. On X-gene the RAM bank resides above 128GB and last xenheap MFN is 0x4400000. With the new way to calculate the number of bits, xenheap_bits will be equal to 38 bits. This will result to hide all the RAM and the impossibility to allocate xenheap memory. Given that aarch64 have always all the memory mapped in Xen virtual memory, it's not necessary to call xenheap_max_mfn which set the number of bits. Suggested-by: Jan Beulich <jbeul...@suse.com> Signed-off-by: Julien Grall <julien.gr...@citrix.com> Acked-by: Ian Campbell <ian.campb...@citrix.com> --- a/xen/arch/arm/setup.c +++ b/xen/arch/arm/setup.c @@ -664,7 +664,6 @@ static void __init setup_mm(unsigned lon xenheap_virt_end = XENHEAP_VIRT_START + ram_end - ram_start; xenheap_mfn_start = ram_start >> PAGE_SHIFT; xenheap_mfn_end = ram_end >> PAGE_SHIFT; - xenheap_max_mfn(xenheap_mfn_end); /* * Need enough mapped pages for copying the DTB. --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -372,7 +372,7 @@ void __init arch_init_memory(void) for ( i = 0; i < l3_table_offset(split_va); ++i ) l3tab[i] = l3idle[i]; - for ( ; i <= L3_PAGETABLE_ENTRIES; ++i ) + for ( ; i < L3_PAGETABLE_ENTRIES; ++i ) l3tab[i] = l3e_empty(); split_l4e = l4e_from_pfn(virt_to_mfn(l3tab), __PAGE_HYPERVISOR); --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -970,7 +970,7 @@ void __init noreturn __start_xen(unsigne setup_max_pdx(raw_max_page); if ( highmem_start ) - xenheap_max_mfn(PFN_DOWN(highmem_start)); + xenheap_max_mfn(PFN_DOWN(highmem_start - 1)); /* * Walk every RAM region and map it in its entirety (on x86/64, at least) @@ -1151,9 +1151,6 @@ void __init noreturn __start_xen(unsigne numa_initmem_init(0, raw_max_page); - end_boot_allocator(); - system_state = SYS_STATE_boot; - if ( max_page - 1 > virt_to_mfn(HYPERVISOR_VIRT_END - 1) ) { unsigned long limit = virt_to_mfn(HYPERVISOR_VIRT_END - 1); @@ -1162,6 +1159,8 @@ void __init noreturn __start_xen(unsigne if ( !highmem_start ) xenheap_max_mfn(limit); + end_boot_allocator(); + /* Pass the remaining memory to the allocator. */ for ( i = 0; i < boot_e820.nr_map; i++ ) { @@ -1185,6 +1184,10 @@ void __init noreturn __start_xen(unsigne opt_tmem = 0; } } + else + end_boot_allocator(); + + system_state = SYS_STATE_boot; vm_init(); console_init_ring(); --- a/xen/common/page_alloc.c +++ b/xen/common/page_alloc.c @@ -405,13 +405,19 @@ void get_outstanding_claims(uint64_t *fr spin_unlock(&heap_lock); } +static bool_t __read_mostly first_node_initialised; +#ifndef CONFIG_SEPARATE_XENHEAP +static unsigned int __read_mostly xenheap_bits; +#else +#define xenheap_bits 0 +#endif + static unsigned long init_node_heap(int node, unsigned long mfn, unsigned long nr, bool_t *use_tail) { /* First node to be discovered has its heap metadata statically alloced. */ static heap_by_zone_and_order_t _heap_static; static unsigned long avail_static[NR_ZONES]; - static int first_node_initialised; unsigned long needed = (sizeof(**_heap) + sizeof(**avail) * NR_ZONES + PAGE_SIZE - 1) >> PAGE_SHIFT; @@ -429,14 +435,18 @@ static unsigned long init_node_heap(int } #ifdef DIRECTMAP_VIRT_END else if ( *use_tail && nr >= needed && - (mfn + nr) <= (virt_to_mfn(eva - 1) + 1) ) + (mfn + nr) <= (virt_to_mfn(eva - 1) + 1) && + (!xenheap_bits || + !((mfn + nr - 1) >> (xenheap_bits - PAGE_SHIFT))) ) { _heap[node] = mfn_to_virt(mfn + nr - needed); avail[node] = mfn_to_virt(mfn + nr - 1) + PAGE_SIZE - sizeof(**avail) * NR_ZONES; } else if ( nr >= needed && - (mfn + needed) <= (virt_to_mfn(eva - 1) + 1) ) + (mfn + needed) <= (virt_to_mfn(eva - 1) + 1) && + (!xenheap_bits || + !((mfn + needed - 1) >> (xenheap_bits - PAGE_SHIFT))) ) { _heap[node] = mfn_to_virt(mfn); avail[node] = mfn_to_virt(mfn + needed - 1) + @@ -1541,11 +1551,13 @@ void free_xenheap_pages(void *v, unsigne #else -static unsigned int __read_mostly xenheap_bits; - void __init xenheap_max_mfn(unsigned long mfn) { - xenheap_bits = fls(mfn) + PAGE_SHIFT; + ASSERT(!first_node_initialised); + ASSERT(!xenheap_bits); + BUILD_BUG_ON(PADDR_BITS >= BITS_PER_LONG); + xenheap_bits = min(fls(mfn + 1) - 1 + PAGE_SHIFT, PADDR_BITS); + printk(XENLOG_INFO "Xen heap: %u bits\n", xenheap_bits); } void init_xenheap_pages(paddr_t ps, paddr_t pe) ++++++ CVE-2015-5239-qemut-limit-client_cut_text-msg-payload-size.patch ++++++ References: bsc#944463 Subject: ui/vnc: limit client_cut_text msg payload size From: Peter Lieven p...@kamp.de Mon Jun 30 10:07:54 2014 +0200 Date: Tue Jul 1 13:26:40 2014 +0200: Git: f9a70e79391f6d7c2a912d785239ee8effc1922d currently a malicious client could define a payload size of 2^32 - 1 bytes and send up to that size of data to the vnc server. The server would allocated that amount of memory which could easily create an out of memory condition. This patch limits the payload size to 1MB max. Please note that client_cut_text messages are currently silently ignored. Signed-off-by: Peter Lieven <p...@kamp.de> Signed-off-by: Gerd Hoffmann <kra...@redhat.com> Index: xen-4.5.1-testing/tools/qemu-xen-traditional-dir-remote/vnc.c =================================================================== --- xen-4.5.1-testing.orig/tools/qemu-xen-traditional-dir-remote/vnc.c +++ xen-4.5.1-testing/tools/qemu-xen-traditional-dir-remote/vnc.c @@ -1779,14 +1779,21 @@ static int protocol_client_msg(VncState pointer_event(vs, read_u8(data, 1), read_u16(data, 2), read_u16(data, 4)); break; case 6: - if (len == 1) + if (len == 1) { return 8; - + } if (len == 8) { uint32_t v; v = read_u32(data, 4); - if (v) + if (v > (1 << 20)) { + VNC_DEBUG("vnc: client_cut_text msg payload has %u bytes" + " which exceeds our limit of 1MB.", v); + vnc_client_error(vs); + break; + } + if (v > 0) { return 8 + v; + } } client_cut_text(vs, read_u32(data, 4), (char *)(data + 8)); ++++++ CVE-2015-5239-qemuu-limit-client_cut_text-msg-payload-size.patch ++++++ References: bsc#944463 Subject: ui/vnc: limit client_cut_text msg payload size From: Peter Lieven p...@kamp.de Mon Jun 30 10:07:54 2014 +0200 Date: Tue Jul 1 13:26:40 2014 +0200: Git: f9a70e79391f6d7c2a912d785239ee8effc1922d currently a malicious client could define a payload size of 2^32 - 1 bytes and send up to that size of data to the vnc server. The server would allocated that amount of memory which could easily create an out of memory condition. This patch limits the payload size to 1MB max. Please note that client_cut_text messages are currently silently ignored. Signed-off-by: Peter Lieven <p...@kamp.de> Signed-off-by: Gerd Hoffmann <kra...@redhat.com> Index: xen-4.5.1-testing/tools/qemu-xen-dir-remote/ui/vnc.c =================================================================== --- xen-4.5.1-testing.orig/tools/qemu-xen-dir-remote/ui/vnc.c +++ xen-4.5.1-testing/tools/qemu-xen-dir-remote/ui/vnc.c @@ -2149,13 +2149,20 @@ static int protocol_client_msg(VncState pointer_event(vs, read_u8(data, 1), read_u16(data, 2), read_u16(data, 4)); break; case VNC_MSG_CLIENT_CUT_TEXT: - if (len == 1) + if (len == 1) { return 8; - + } if (len == 8) { uint32_t dlen = read_u32(data, 4); - if (dlen > 0) + if (dlen > (1 << 20)) { + error_report("vnc: client_cut_text msg payload has %u bytes" + " which exceeds our limit of 1MB.", dlen); + vnc_client_error(vs); + break; + } + if (dlen > 0) { return 8 + dlen; + } } client_cut_text(vs, read_u32(data, 4), data + 8); ++++++ CVE-2015-6815-qemut-e1000-fix-infinite-loop.patch ++++++ References: bsc#944697 From: P J P <address@hidden> While processing transmit descriptors, it could lead to an infinite loop if 'bytes' was to become zero; Add a check to avoid it. [The guest can force 'bytes' to 0 by setting the hdr_len and mss descriptor fields to 0. --Stefan] Signed-off-by: P J P <address@hidden> Signed-off-by: Stefan Hajnoczi <address@hidden> --- hw/net/e1000.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) Index: xen-4.5.1-testing/tools/qemu-xen-traditional-dir-remote/hw/e1000.c =================================================================== --- xen-4.5.1-testing.orig/tools/qemu-xen-traditional-dir-remote/hw/e1000.c +++ xen-4.5.1-testing/tools/qemu-xen-traditional-dir-remote/hw/e1000.c @@ -470,7 +470,8 @@ process_tx_desc(E1000State *s, struct e1 memmove(tp->data, tp->header, hdr); tp->size = hdr; } - } while (split_size -= bytes); + split_size -= bytes; + } while (bytes && split_size); } else if (!tp->tse && tp->cptse) { // context descriptor TSE is not set, while data descriptor TSE is set DBGOUT(TXERR, "TCP segmentaion Error\n"); ++++++ CVE-2015-6815-qemuu-e1000-fix-infinite-loop.patch ++++++ References: bsc#944697 From: P J P <address@hidden> While processing transmit descriptors, it could lead to an infinite loop if 'bytes' was to become zero; Add a check to avoid it. [The guest can force 'bytes' to 0 by setting the hdr_len and mss descriptor fields to 0. --Stefan] Signed-off-by: P J P <address@hidden> Signed-off-by: Stefan Hajnoczi <address@hidden> --- hw/net/e1000.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) Index: xen-4.5.1-testing/tools/qemu-xen-dir-remote/hw/net/e1000.c =================================================================== --- xen-4.5.1-testing.orig/tools/qemu-xen-dir-remote/hw/net/e1000.c +++ xen-4.5.1-testing/tools/qemu-xen-dir-remote/hw/net/e1000.c @@ -707,7 +707,8 @@ process_tx_desc(E1000State *s, struct e1 memmove(tp->data, tp->header, tp->hdr_len); tp->size = tp->hdr_len; } - } while (split_size -= bytes); + split_size -= bytes; + } while (bytes && split_size); } else if (!tp->tse && tp->cptse) { // context descriptor TSE is not set, while data descriptor TSE is set DBGOUT(TXERR, "TCP segmentation error\n"); ++++++ pci-attach-fix.patch ++++++ >From 9bfb923a855388bb38f7f57b4881bc888a04f9b5 Mon Sep 17 00:00:00 2001 From: Chunyan Liu <cy...@suse.com> Date: Mon, 14 Sep 2015 14:45:37 +0800 Subject: [PATCH] pci-attach: fix assertation run "xl pci-attach <domain> <pci_device>", the 2nd time fails: xl: libxl_xshelp.c:209: libxl__xs_transaction_start: Assertion `!*t' failed. Aborted To fix that, initialize xs_transaction to avoid libxl__xs_transaction_start assertion error. Signed-off-by: Chunyan Liu <cy...@suse.com> --- tools/libxl/libxl_pci.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/libxl/libxl_pci.c b/tools/libxl/libxl_pci.c index 1ebdce7..19c597e 100644 --- a/tools/libxl/libxl_pci.c +++ b/tools/libxl/libxl_pci.c @@ -123,7 +123,7 @@ static int libxl__device_pci_add_xenstore(libxl__gc *gc, uint32_t domid, libxl_d flexarray_t *back; char *num_devs, *be_path; int num = 0; - xs_transaction_t t; + xs_transaction_t t = XBT_NULL; libxl__device *device; int rc; libxl_domain_config d_config; -- 2.1.4 ++++++ xen.build-compare.xen_compile_h.patch ++++++ --- /var/tmp/diff_new_pack.mHwCXF/_old 2015-09-24 07:17:04.000000000 +0200 +++ /var/tmp/diff_new_pack.mHwCXF/_new 2015-09-24 07:17:04.000000000 +0200 @@ -1,8 +1,6 @@ Use stable strings to reduce build-compare noise. -Index: xen-4.5.1-testing/xen/Makefile -=================================================================== ---- xen-4.5.1-testing.orig/xen/Makefile -+++ xen-4.5.1-testing/xen/Makefile +--- a/xen/Makefile ++++ b/xen/Makefile @@ -8,6 +8,9 @@ export XEN_FULLVERSION = $(XEN_VERSION export XEN_WHOAMI ?= $(USER)