On 1/14/19 9:01 PM, H. Peter Anvin wrote: > > This could be as simple as spinning for a limited time waiting for > states 0 or 3 if we are not the patching CPU. It is also not necessary > to wait for the mask to become zero for the first sync if we find > ourselves suddenly in state 4. >
So this would look something like this for the #BP handler; I think this is safe. This uses the TLB miss on the write page intentionally to slow down the loop a bit to reduce the risk of livelock. Note that "bp_write_addr" here refers to the write address for the breakpoint that was taken. state = atomic_read(&bp_poke_state); if (state == 0) return 0; /* No patching in progress */ recheck: clear bit in mask switch (state) { case 1: case 4: if (smp_processor_id() != bp_patching_cpu) { int retries = NNN; while (retries--) { invlpg if (*bp_write_addr != 0xcc) goto recheck; state = atomic_read(&bp_poke_state); if (state != 1 && state != 4) goto recheck; } } state = cmpxchg(&bp_poke_state, 1, 4); if (state != 1 && state != 4) goto recheck; atomic_write(bp_write_addr, bp_old_value); break; case 2: if (smp_processor_id() != bp_patching_cpu) { invlpg state = atomic_read(&bp_poke_state); if (state != 2) goto recheck; } complete patch sequence remove breakpoint break; case 3: case 0: /* * If we are here, the #BP will go away on its * own, or we will re-take it if it was a "real" * breakpoint. */ break; } return 1;