On Sat, 2020-10-10 at 12:54 +0200, Borislav Petkov wrote:
> On Fri, Oct 09, 2020 at 11:01:18AM -0700, Joe Perches wrote:
> > Given the location, this only works on .c and .h files.
> > It does not work on .S files.  Should it?
> 
> Probably not because there will be too many false positives. .byte is
> used not only to spell instruction opcodes in .S files. And the main
> case we're addressing here is using those .byte spelled opcodes in asm
> volatile constructs so...

[]

> > So it looks like the regex would be more complete as:
> > 
> >     if ($realfile =~ m@^arch/x86/@ &&
> >         $rawline =~ /\.byte\s+(?:$Constant|(?:\\)?$Ident|"\s*$Ident)\b/) {
> 
> This is much less readable than what I have now (yes, realfile test
> should come first):

I care less about readability than completeness, correctness,
and minimal false positives.

>       if ($realfile =~ m@^arch/x86/@ && $rawline =~ /\.byte[\s0-9a-fx,]+/) {
> 
> Also, this
> 
>       $rawline =~ /\.byte\s+(?:$Constant|(?:\\)?$Ident|"\s*$Ident)\b/) {
> 
> matches
> 
>       ".byte 0x66"

And? So does the regex you propose.

> This
> 
>       $rawline =~ /\.byte[\s0-9a-fx,]+)) {
> 
> matches
> 
>       ".byte 0x66, 0x0f, 0x38, 0xf8, 0x02"
> 
> which is what it needs to match.

Is that the only case you are trying to match?
two or more "0x<hex>," ?

Then this could use:

/"\s*\.byte\s+(?:0x[0-9a-fA-F]{1,2}\s*,\s*){2,4}/

$ git grep -P '"\s*\.byte\s+(?:0x[0-9a-fA-F]{1,2}\s*,\s*){2,4}' arch/x86
arch/x86/include/asm/intel_pconfig.h:#define PCONFIG ".byte 0x0f, 0x01, 0xc5"
arch/x86/include/asm/mwait.h:   asm volatile(".byte 0x0f, 0x01, 0xc8;"
arch/x86/include/asm/mwait.h:   asm volatile(".byte 0x0f, 0x01, 0xfa;"
arch/x86/include/asm/mwait.h:   asm volatile(".byte 0x0f, 0x01, 0xc9;"
arch/x86/include/asm/mwait.h:   asm volatile(".byte 0x0f, 0x01, 0xfb;"
arch/x86/include/asm/mwait.h:   asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1\t\n"
arch/x86/include/asm/segment.h:                 ".byte 0xf3,0x0f,0xc7,0xf8", /* 
RDPID %eax/rax */
arch/x86/include/asm/smap.h:#define __ASM_CLAC  ".byte 0x0f,0x01,0xca"
arch/x86/include/asm/smap.h:#define __ASM_STAC  ".byte 0x0f,0x01,0xcb"
arch/x86/include/asm/special_insns.h:   asm volatile(".byte 0x0f,0x01,0xee\n\t"
arch/x86/include/asm/special_insns.h:   asm volatile(".byte 0x0f,0x01,0xef\n\t"
arch/x86/include/asm/special_insns.h:           ".byte 0x66, 0x0f, 0xae, 0x30", 
 /* clwb (%%rax) */
arch/x86/include/asm/special_insns.h:   asm volatile(".byte 0x66, 0x0f, 0x38, 
0xf8, 0x02"
arch/x86/include/asm/special_insns.h:   asm volatile(".byte 0xf3, 0x0f, 0x38, 
0xf8, 0x02, 0x66, 0x90"
arch/x86/include/asm/special_insns.h:   asm volatile(".byte 0xf, 0x1, 0xe8" ::: 
"memory");

vs:

$ git grep -P '\.byte[\s0-9a-fx,]+' -- 'arch/x86/*.[ch]' | cat
arch/x86/include/asm/alternative-asm.h: .byte \orig_len
arch/x86/include/asm/alternative-asm.h: .byte \alt_len
arch/x86/include/asm/alternative-asm.h: .byte \pad_len
arch/x86/include/asm/alternative.h:     " .byte " alt_total_slen "\n"           
        /* source len      */ \
arch/x86/include/asm/alternative.h:     " .byte " alt_rlen(num) "\n"            
        /* replacement len */ \
arch/x86/include/asm/alternative.h:     " .byte " alt_pad_len "\n"              
        /* pad len */
arch/x86/include/asm/bug.h:#define ASM_UD0              ".byte 0x0f, 0xff" /* + 
ModRM (for Intel) */
arch/x86/include/asm/bug.h:#define ASM_UD1              ".byte 0x0f, 0xb9" /* + 
ModRM */
arch/x86/include/asm/bug.h:#define ASM_UD2              ".byte 0x0f, 0x0b"
arch/x86/include/asm/cpufeature.h:               " .byte 3b - 1b\n"             
/* src len */
arch/x86/include/asm/cpufeature.h:               " .byte 5f - 4f\n"             
/* repl len */
arch/x86/include/asm/cpufeature.h:               " .byte 3b - 2b\n"             
/* pad len */
arch/x86/include/asm/cpufeature.h:               " .byte 3b - 1b\n"             
/* src len */
arch/x86/include/asm/cpufeature.h:               " .byte 0\n"                   
/* repl len */
arch/x86/include/asm/cpufeature.h:               " .byte 0\n"                   
/* pad len */
arch/x86/include/asm/fpu/internal.h:#define XSAVE               ".byte " 
REX_PREFIX "0x0f,0xae,0x27"
arch/x86/include/asm/fpu/internal.h:#define XSAVEOPT    ".byte " REX_PREFIX 
"0x0f,0xae,0x37"
arch/x86/include/asm/fpu/internal.h:#define XSAVES              ".byte " 
REX_PREFIX "0x0f,0xc7,0x2f"
arch/x86/include/asm/fpu/internal.h:#define XRSTOR              ".byte " 
REX_PREFIX "0x0f,0xae,0x2f"
arch/x86/include/asm/fpu/internal.h:#define XRSTORS             ".byte " 
REX_PREFIX "0x0f,0xc7,0x1f"
arch/x86/include/asm/idtentry.h: * Note, that the 'pushq imm8' is emitted via 
'.byte 0x6a, vector' because
arch/x86/include/asm/idtentry.h: * .byte achieves the same thing and the only 
fixup needed in the C entry
arch/x86/include/asm/idtentry.h:        .byte   0x6a, vector
arch/x86/include/asm/idtentry.h:        .byte   0x6a, vector
arch/x86/include/asm/inst.h: * Generate .byte code for some instructions not 
supported by old
arch/x86/include/asm/inst.h:    .byte 0x40 | ((\opd1 & 8) >> 3) | ((\opd2 & 8) 
>> 1) | (\W << 3)
arch/x86/include/asm/inst.h:    .byte \mod | (\opd1 & 7) | ((\opd2 & 7) << 3)
arch/x86/include/asm/inst.h:    .byte 0xf3
arch/x86/include/asm/inst.h:    .byte 0x0f, 0xc7
arch/x86/include/asm/intel_pconfig.h:#define PCONFIG ".byte 0x0f, 0x01, 0xc5"
arch/x86/include/asm/jump_label.h:              ".byte " 
__stringify(STATIC_KEY_INIT_NOP) "\n\t"
arch/x86/include/asm/jump_label.h:              ".byte 0xe9\n\t .long %l[l_yes] 
- 2f\n\t"
arch/x86/include/asm/jump_label.h:      .byte           0xe9
arch/x86/include/asm/jump_label.h:      .byte           STATIC_KEY_INIT_NOP
arch/x86/include/asm/jump_label.h:      .byte           STATIC_KEY_INIT_NOP
arch/x86/include/asm/jump_label.h:      .byte           0xe9
arch/x86/include/asm/mwait.h:   asm volatile(".byte 0x0f, 0x01, 0xc8;"
arch/x86/include/asm/mwait.h:   asm volatile(".byte 0x0f, 0x01, 0xfa;"
arch/x86/include/asm/mwait.h:   asm volatile(".byte 0x0f, 0x01, 0xc9;"
arch/x86/include/asm/mwait.h:   asm volatile(".byte 0x0f, 0x01, 0xfb;"
arch/x86/include/asm/mwait.h:   asm volatile("sti; .byte 0x0f, 0x01, 0xc9;"
arch/x86/include/asm/mwait.h:   asm volatile(".byte 0x66, 0x0f, 0xae, 0xf1\t\n"
arch/x86/include/asm/nops.h:#define _ASM_MK_NOP(x) .byte x
arch/x86/include/asm/nops.h:#define _ASM_MK_NOP(x) ".byte " __stringify(x) "\n"
arch/x86/include/asm/paravirt.h:         .byte ptype;                           
\
arch/x86/include/asm/paravirt.h:         .byte 772b-771b;                       
\
arch/x86/include/asm/paravirt_types.h:  "  .byte " type "\n"                    
        \
arch/x86/include/asm/paravirt_types.h:  "  .byte 772b-771b\n"                   
        \
arch/x86/include/asm/segment.h:                 ".byte 0xf3,0x0f,0xc7,0xf8", /* 
RDPID %eax/rax */
arch/x86/include/asm/smap.h:#define __ASM_CLAC  ".byte 0x0f,0x01,0xca"
arch/x86/include/asm/smap.h:#define __ASM_STAC  ".byte 0x0f,0x01,0xcb"
arch/x86/include/asm/special_insns.h:   asm volatile(".byte 0x0f,0x01,0xee\n\t"
arch/x86/include/asm/special_insns.h:   asm volatile(".byte 0x0f,0x01,0xef\n\t"
arch/x86/include/asm/special_insns.h:   alternative_io(".byte " 
__stringify(NOP_DS_PREFIX) "; clflush %P0",
arch/x86/include/asm/special_insns.h:                  ".byte 0x66; clflush 
%P0",
arch/x86/include/asm/special_insns.h:           ".byte " 
__stringify(NOP_DS_PREFIX) "; clflush (%[pax])",
arch/x86/include/asm/special_insns.h:           ".byte 0x66; clflush (%[pax])", 
/* clflushopt (%%rax) */
arch/x86/include/asm/special_insns.h:           ".byte 0x66, 0x0f, 0xae, 0x30", 
 /* clwb (%%rax) */
arch/x86/include/asm/special_insns.h:   asm volatile(".byte 0x66, 0x0f, 0x38, 
0xf8, 0x02"
arch/x86/include/asm/special_insns.h:   asm volatile(".byte 0xf3, 0x0f, 0x38, 
0xf8, 0x02, 0x66, 0x90"
arch/x86/include/asm/special_insns.h:   asm volatile(".byte 0xf, 0x1, 0xe8" ::: 
"memory");
arch/x86/include/asm/static_call.h:     __ARCH_DEFINE_STATIC_CALL_TRAMP(name, 
".byte 0xe9; .long " #func " - (. + 4)")
arch/x86/include/asm/xen/interface.h:#define XEN_EMULATE_PREFIX 
__ASM_FORM(.byte __XEN_EMULATE_PREFIX ;)
arch/x86/kvm/vmx/ops.h:              ".byte 0x3e\n\t" /* branch taken hint */
arch/x86/kvm/vmx/ops.h:                   ".byte 0x2e\n\t" /* branch not taken 
hint */  \
arch/x86/kvm/vmx/ops.h:                   ".byte 0x2e\n\t" /* branch not taken 
hint */  \
arch/x86/realmode/rm/realmode.h:#define LJMPW_RM(to)    .byte 0xea ; .word 
(to), real_mode_seg

and if lines like:

arch/x86/include/asm/inst.h: * Generate .byte code for some instructions not 
supported by old
arch/x86/include/asm/paravirt_types.h:  "  .byte " type "\n"                    
        \

are false positives then perhaps change the regex
to add required spaces after the \.byte before the
match.



Reply via email to