2015-01-30 16:23+0100, Paolo Bonzini:
> On 30/01/2015 16:14, Radim Krčmář wrote:
> > > > +       case KVM_APIC_MODE_XAPIC_FLAT:
> > > > +               *cid = 0;
> > > > +               *lid = ldr & 0xff;
> > > > +               return true;
> > > > +       case KVM_APIC_MODE_XAPIC_CLUSTER:
> > > > +               *cid = (ldr >> 4) & 0xf;
> > > > +               *lid = ldr & 0xf;
> > > > +               return true;
> > > > +       case KVM_APIC_MODE_X2APIC:
> > > > +               *cid = ldr >> 16;
> > > > +               *lid = ldr & 0xffff;
> > > > +               return true;
> > > > +       }
> > 
> >> >  lid_bits = mode;
> >> >  cid_bits = mode & (16 | 4);
> >> >  lid_mask = (1 << lid_bits) - 1;
> >> >  cid_mask = (1 << cid_bits) - 1;
> >> > 
> >> >  *cid = (ldr >> lid_bits) & cid_mask;
> >> >  *lid = ldr & lid_mask;
> > Would jump predictor fail on the switch?  Or is size of the code that
> > important?  This code is shorter, but is going to execute far more
> > operations, so I think it would be slower ... (And harder to read.)
> 
> Considering the additional comparisons for the switch, I don't think
> it's going to execute far more operations...

(A quick assembly comparison [1].)

As optimizations go, we could drop the "&" on cid as "cid < 16" is
checked later, so mode=4 practically does nothing ... Not the best for
future bugs, but still pretty safe -- only x2APIC can set a value that
would require the "&" and it can't have valid XAPIC mode, so u32 still
protects us enough.

  *cid = ldr >> mode;
  *lid = ldr & ((1 << mode) - 1);

Which is probably faster a solution where we don't shuffle switch cases
to jump to x2APIC first.  A comparison is at the very bottom [2].

Would that be ok in v2?


---
1:  To make it easier, it wasn't inlined, sorry.

    There are four parts, first one compares the whole functions and
    three subsequent compare each switch case, with common head/tail
    omitted.  (We don't care about performance when the map is invalid.)

    The optimized version is always first and next to it is the old one.

0000000000001700 <apic_logical_id>:  (ALI from now on)
  1700: callq  1705 <ALI+0x5>     1700: callq  1705 <ALI+0x5>
  1705: push   %rbp               1705: movzbl 0x10(%rdi),%eax
  1706: movzbl 0x10(%rdi),%r8d    1709: push   %rbp
  170b: mov    %rcx,%r10          170a: mov    %rsp,%rbp
  170e: xor    %eax,%eax          170d: cmp    $0x8,%al
  1710: mov    %rsp,%rbp          170f: je     1758 <ALI+0x58>
  1713: lea    -0x1(%r8),%ecx     1711: cmp    $0x10,%al
  1717: test   %r8d,%ecx          1713: je     1740 <ALI+0x40>
  171a: jne    1752 <ALI+0x52>    1715: cmp    $0x4,%al
  171c: mov    %r8d,%edi          1717: je     1720 <ALI+0x20>
  171f: mov    $0x1,%eax          1719: xor    %eax,%eax
  1724: mov    %esi,%r8d          171b: pop    %rbp
  1727: mov    %edi,%ecx          171c: retq   
  1729: mov    %eax,%r9d          171d: nopl   (%rax)
  172c: shr    %cl,%r8d           1720: mov    %esi,%eax
  172f: and    $0x14,%ecx         1722: and    $0xf,%esi
  1732: shl    %cl,%r9d           1725: shr    $0x4,%eax
  1735: mov    %edi,%ecx          1728: and    $0xf,%eax
  1737: shl    %cl,%eax           172b: mov    %ax,(%rdx)
  1739: sub    $0x1,%r9d          172e: mov    $0x1,%eax
  173d: sub    $0x1,%eax          1733: mov    %si,(%rcx)
  1740: and    %r9d,%r8d          1736: pop    %rbp
  1743: and    %esi,%eax          1737: retq   
  1745: mov    %r8w,(%rdx)        1738: nopl   0x0(%rax,%rax,1)
  1749: mov    %ax,(%r10)         173f: 
  174d: mov    $0x1,%eax          1740: mov    %esi,%eax
  1752: pop    %rbp               1742: shr    $0x10,%eax
  1753: retq                      1745: mov    %ax,(%rdx)
                                  1748: mov    $0x1,%eax
                                  174d: mov    %si,(%rcx)
                                  1750: pop    %rbp
                                  1751: retq   
                                  1752: nopw   0x0(%rax,%rax,1)
                                  1758: xor    %eax,%eax
                                  175a: and    $0xff,%si
                                  175f: mov    %ax,(%rdx)
                                  1762: mov    $0x1,%eax
                                  1767: mov    %si,(%rcx)
                                  176a: pop    %rbp
                                  176b: retq   



  170b: mov    %rcx,%r10          170a: mov    %rsp,%rbp
  170e: xor    %eax,%eax          170d: cmp    $0x8,%al
  1710: mov    %rsp,%rbp          170f: je     1758 <ALI+0x58>
  1713: lea    -0x1(%r8),%ecx         [jump]
  1717: test   %r8d,%ecx          1758: xor    %eax,%eax
  171a: jne    1752 <ALI+0x52>    175a: and    $0xff,%si
  171c: mov    %r8d,%edi          175f: mov    %ax,(%rdx)
  171f: mov    $0x1,%eax          1762: mov    $0x1,%eax
  1724: mov    %esi,%r8d          1767: mov    %si,(%rcx)
  1727: mov    %edi,%ecx          
  1729: mov    %eax,%r9d          
  172c: shr    %cl,%r8d           
  172f: and    $0x14,%ecx         
  1732: shl    %cl,%r9d           
  1735: mov    %edi,%ecx          
  1737: shl    %cl,%eax           
  1739: sub    $0x1,%r9d          
  173d: sub    $0x1,%eax          
  1740: and    %r9d,%r8d          
  1743: and    %esi,%eax          
  1745: mov    %r8w,(%rdx)        
  1749: mov    %ax,(%r10)         
  174d: mov    $0x1,%eax          


  170b: mov    %rcx,%r10          170a: mov    %rsp,%rbp
  170e: xor    %eax,%eax          170d: cmp    $0x8,%al
  1710: mov    %rsp,%rbp          170f: je     1758 <ALI+0x58>
  1713: lea    -0x1(%r8),%ecx     1711: cmp    $0x10,%al
  1717: test   %r8d,%ecx          1713: je     1740 <ALI+0x40>
  171a: jne    1752 <ALI+0x52>        [jump]
  171c: mov    %r8d,%edi          1740: mov    %esi,%eax
  171f: mov    $0x1,%eax          1742: shr    $0x10,%eax
  1724: mov    %esi,%r8d          1745: mov    %ax,(%rdx)
  1727: mov    %edi,%ecx          1748: mov    $0x1,%eax
  1729: mov    %eax,%r9d          174d: mov    %si,(%rcx)
  172c: shr    %cl,%r8d           
  172f: and    $0x14,%ecx         
  1732: shl    %cl,%r9d           
  1735: mov    %edi,%ecx          
  1737: shl    %cl,%eax           
  1739: sub    $0x1,%r9d          
  173d: sub    $0x1,%eax          
  1740: and    %r9d,%r8d          
  1743: and    %esi,%eax          
  1745: mov    %r8w,(%rdx)        
  1749: mov    %ax,(%r10)         
  174d: mov    $0x1,%eax          


  170b: mov    %rcx,%r10          170a: mov    %rsp,%rbp        
  170e: xor    %eax,%eax          170d: cmp    $0x8,%al
  1710: mov    %rsp,%rbp          170f: je     1758 <ALI+0x58>
  1713: lea    -0x1(%r8),%ecx     1711: cmp    $0x10,%al
  1717: test   %r8d,%ecx          1713: je     1740 <ALI+0x40>
  171a: jne    1752 <ALI+0x52>    1715: cmp    $0x4,%al
  171c: mov    %r8d,%edi          1717: je     1720 <ALI+0x20>
  171f: mov    $0x1,%eax              [jump]
  1724: mov    %esi,%r8d          1720: mov    %esi,%eax
  1727: mov    %edi,%ecx          1722: and    $0xf,%esi
  1729: mov    %eax,%r9d          1725: shr    $0x4,%eax
  172c: shr    %cl,%r8d           1728: and    $0xf,%eax
  172f: and    $0x14,%ecx         172b: mov    %ax,(%rdx)
  1732: shl    %cl,%r9d           172e: mov    $0x1,%eax
  1735: mov    %edi,%ecx          1733: mov    %si,(%rcx)
  1737: shl    %cl,%eax           
  1739: sub    $0x1,%r9d          
  173d: sub    $0x1,%eax          
  1740: and    %r9d,%r8d          
  1743: and    %esi,%eax          
  1745: mov    %r8w,(%rdx)        
  1749: mov    %ax,(%r10)         
  174d: mov    $0x1,%eax          


2: A comparison of aggressively optimized ALI with the worst case
   scenario, where x2APIC is the third jump.

  16e5: mov    %rcx,%r9           170a: mov    %rsp,%rbp        
  16ed: xor    %eax,%eax          170d: cmp    $0x8,%al
  16ef: mov    %rsp,%rbp          170f: je     1758 <ALI+0x58>
  16f2: lea    -0x1(%rcx),%r8d    1711: cmp    $0x10,%al
  16f6: test   %ecx,%r8d          1713: je     1740 <ALI+0x40>
  16f9: jne    1717 <ALI+0x37>    1715: cmp    $0x4,%al
  16fb: mov    %esi,%eax          1717: je     1720 <ALI+0x20>
  16fd: shr    %cl,%eax               [jump]
  16ff: mov    %ax,(%rdx)         1720: mov    %esi,%eax
  1702: mov    $0x1,%eax          1722: and    $0xf,%esi
  1707: shl    %cl,%eax           1725: shr    $0x4,%eax
  1709: sub    $0x1,%eax          1728: and    $0xf,%eax
  170c: and    %esi,%eax          172b: mov    %ax,(%rdx)
  170e: mov    %ax,(%r9)          172e: mov    $0x1,%eax
  1712: mov    $0x1,%eax          1733: mov    %si,(%rcx)
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to