This became enough of a bother after crashing twice for me to read the
code. I think it's fairly clearly broken, somebody tell me if I misread
something. Grab a sys/dev/pci/drm/drm_edid.c for full context.

So, the crash seems to happen somewhere here, inside the inlined
drm_edid_block_checksum (its unrolled body wasn't fun to follow):

    static int drm_edid_block_checksum(const u8 *raw_edid)
    {
            int i;
            u8 csum = 0, crc = 0;

            for (i = 0; i < EDID_LENGTH - 1; i++)
                    csum += raw_edid[i];

...

This above reads the memory in the range [raw_edid, raw_edid+0x7e). The
invocation of drm_edid_block_checksum is here:

   static void connector_bad_edid(struct drm_connector *connector,
                                  u8 *edid, int num_blocks)
   {
           int i;
           u8 num_of_ext = edid[0x7e];

           if (num_of_ext > num_blocks)
                   num_of_ext = num_blocks;

           /* Calculate real checksum for the last edid extension block data */
           connector->real_edid_checksum =
                   drm_edid_block_checksum(edid + num_of_ext * EDID_LENGTH);

Now, the stack trace below fairly clearly indicates that num_blocks is 1
(the value of %rax compared to the second parameter of
connector_bad_edid corroborates). My reading of drm_do_get_edid
disassembly indicates that connector_bad_edid we call is the one in the
error path after carp, so let read a bit of drm_do_get_edid:

    struct edid *drm_do_get_edid(struct drm_connector *connector,
            int (*get_edid_block)(void *data, u8 *buf, unsigned int block,
                                  size_t len),
            void *data)

            if ((edid = kmalloc(EDID_LENGTH, GFP_KERNEL)) == NULL)
                    return NULL;

            /* base block fetch */
            for (i = 0; i < 4; i++) {
                    if (get_edid_block(data, edid, 0, EDID_LENGTH))
                            goto out;
                    if (drm_edid_block_valid(edid, 0, false,
                                             &connector->edid_corrupt))
                            break;
                    if (i == 0 && drm_edid_is_zero(edid, EDID_LENGTH)) {
                            connector->null_edid_counter++;
                            goto carp;
                    }
            }
            if (i == 4)
                    goto carp;
    ...

    carp:
            connector_bad_edid(connector, edid, 1);

I see that we allocate edid of size EDID_LENGTH and then use
connector_bad_edid to read after the end of that block. The merciful
page fault is the best our kernel can do for this code, no?

I'm inclined to just do this:
-            connector_bad_edid(connector, edid, 1);
+            connector_bad_edid(connector, edid, 0);

Am I wrong? Should there be a fix in the other call of connector_bad_edid?

Thanks
Greg


Greg Steuck <[email protected]> writes:

> I noticed we saw variations of this before. This hit my X1 9th gen when
> waking up with a 4k monitor attached via HDMI connector. FWIW, the
> screen is mirrored between the built-in panel and the big monitor.
>
> The below is largely OCRed and then hand-fixed from
> https://photos.app.goo.gl/qyXdd3e1Nsso921o9
>
> uvm_fault(0xffffffff82167400, 0xffff8000026c7000, 0, 1) -> e
> connector_bad_edid+0x50:  movzbl 0xfffffffffffffffd(%rax, %rcx,1), %esi
> ddb{2}> bt
> connector_bad_edid(ffff800000cde000, ffff8000026c6f80,1) at 
> connector_bad_edid+0x50
> drm_do_get_edid(ffff800000cde000, ffffffff81a00770, ffff8000002b20c8) at 
> drm_do_get_edid+0x378
> drm_get_edid(ffff800000cde000, ffff8000002b20c0) at drm_get_edid+0x67?
> intel_hdmi_set_edid(ffff800000cde000) at intel_hdmi_set_edid+0x69
> intel_encoder_hotplug(ffff800000cd9000, ffff800000cd000,1) at 
> intel_encoder_hotplug+0x7f
> intel_ddi_hotplug(ffff800000cd9000, ffff800000cde000.1) at 
> intel_ddi_hotplug+0x54
> intel_hdmi_detect(ffff800000cd000,0) at intel_hdmi_detect+0xb1
> drm_helper_probe_detect(ffff800000cde000,0.0) at drm_helper_probe_detect+0xed
> i915_hotplug_work_func(ffff8000002b3168) at i915_hotplug_work_func+0x24f
> taskq_thread(ffff8000082aa380) at taskq_thread+0x81
> end trace frame: 0x0, count: -10
> ddb{2}> show registers
> rdi            0xffff800000cde000
> rsi          0xffff8000026c6f80
> rbp          0xffff800021c0c620
> rbx          0xffff8000026c6f80
> rdx          0
> rcx          0x3
> rax          0xffff8000026c7000
> r8           0x200
> r9           0xa               
> r10          0xa146c6f403a26e87
> r11          0x7b0d0680ac61d824
> r12          0xffff8000002b20c0
> r13          0xffffffff81a00770
> r14          0xffff800000cde800
> r15          0x1
> rip          0xffffffff81a005b0   connector_bad_edid+0x50
> cs           0x8
> rflags               0x10246           
> rsp          0xffff800021c0c5b0
> ss           0x10
> connector_bad_edid+0x50:  movzbl 0xfffffffffffffffd(%rax, %rcx,1), %esi
> ddb{2}> show proc
> PROC (draw) pid=476108 stat=onproc
>      flags process=14000<NOZOMBIE, SYSTEM> proc=200<SYSTEM>
>      pri=0, usrpri=59, nice=20
>      forw=0xffffffffffffffff, List-0xffff8000ffffc7e8,0xffff 8000ffffc018
>      process=0xffff800021c063e8 user=0xffff800021c07000, 
> vmspace=0xffffffff821b0388
>      estcpu=9, cpticks=38, pctcpu=0.0
>      user=0, sys=1, intr=0
> ddb{2}>
>
> OpenBSD 6.9-beta (GENERIC.MP) #419: Sat Mar 20 00:43:49 MDT 2021
>     [email protected]:/usr/src/sys/arch/amd64/compile/GENERIC.MP
> real mem = 16903794688 (16120MB)
> avail mem = 16376107008 (15617MB)
> random: good seed from bootblocks
> mpath0 at root
> scsibus0 at mpath0: 256 targets
> mainbus0 at root
> bios0 at mainbus0: SMBIOS rev. 3.2 @ 0x766ac000 (72 entries)
> bios0: vendor LENOVO version "N2WET24W (1.14 )" date 10/15/2020
> bios0: LENOVO 20UAS1H100
> acpi0 at bios0: ACPI 6.1
> acpi0: sleep states S0 S3 S4 S5
> acpi0: tables DSDT FACP SSDT SSDT SSDT SSDT SSDT TPM2 SSDT HPET APIC MCFG 
> ECDT SSDT SSDT SSDT NHLT BOOT SSDT LPIT WSMT SSDT DBGP DBG2 POAT BATB DMAR 
> ASF! BGRT UEFI FPDT
> acpi0: wakeup devices GLAN(S4) XHC_(S3) XDCI(S4) HDAS(S4) RP01(S4) PXSX(S4) 
> RP02(S4) PXSX(S4) PXSX(S4) RP04(S4) PXSX(S4) RP05(S4) PXSX(S4) RP06(S4) 
> PXSX(S4) RP07(S4) [...]
> acpitimer0 at acpi0: 3579545 Hz, 24 bits
> acpihpet0 at acpi0: 23999999 Hz
> acpimadt0 at acpi0 addr 0xfee00000: PC-AT compat
> cpu0 at mainbus0: apid 0 (boot processor)
> cpu0: Intel(R) Core(TM) i7-10610U CPU @ 1.80GHz, 7094.00 MHz, 06-8e-0c
> cpu0: 
> FPU,VME,DE,PSE,TSC,MSR,PAE,MCE,CX8,APIC,SEP,MTRR,PGE,MCA,CMOV,PAT,PSE36,CFLUSH,DS,ACPI,MMX,FXSR,SSE,SSE2,SS,HTT,TM,PBE,SSE3,PCLMUL,DTES64,MWAIT,DS-CPL,VMX,SMX,EST,TM2,SSSE3,SDBG,FMA3,CX16,xTPR,PDCM,PCID,SSE4.1,SSE4.2,x2APIC,MOVBE,POPCNT,DEADLINE,AES,XSAVE,AVX,F16C,RDRAND,NXE,PAGE1GB,RDTSCP,LONG,LAHF,ABM,3DNOWP,PERF,ITSC,FSGSBASE,TSC_ADJUST,SGX,BMI1,AVX2,SMEP,BMI2,ERMS,INVPCID,MPX,RDSEED,ADX,SMAP,CLFLUSHOPT,PT,SRBDS_CTRL,MD_CLEAR,IBRS,IBPB,STIBP,L1DF,SSBD,SENSOR,ARAT,XSAVEOPT,XSAVEC,XGETBV1,XSAVES
> cpu0: 256KB 64b/line 8-way L2 cache
> cpu0: smt 0, core 0, package 0
> mtrr: Pentium Pro MTRR support, 10 var ranges, 88 fixed ranges
> cpu0: apic clock running at 24MHz
> cpu0: mwait min=64, max=64, C-substates=0.2.1.2.4.1.1.1, IBE
> ...
> inteldrm0 at pci0 dev 2 function 0 "Intel UHD Graphics" rev 0x02
> drm0 at inteldrm0
> inteldrm0: msi, COFFEELAKE, gen 9
> ...
> inteldrm0: 3840x2160, 32bpp
> wsdisplay0 at inteldrm0 mux 1: console (std, vt100 emulation), using wskbd0
> wsdisplay0: screen 1-5 added (std, vt100 emulation)
> ...
> drm:pid42157:connector_bad_edid *WARNING* HDMI-A-1: EDID is invalid:
>       [00] BAD  00 ff ff ff ff ff ff 00 ff 00 04 04 72 72 10 04
>       [00] BAD  04 d5 a1 a1 80 80 73 26 26 1b 01 01 03 80 80 47
>       [00] BAD  47 28 78 78 2e 88 88 e5 e5 a8 55 55 4d a0 a0 25
>       [00] BAD  0e 0e 50 50 54 bf bf ef 80 80 71 71 4f 81 81 40
>       [00] BAD  81 81 80 81 81 c0 c0 95 00 00 b3 00 00 d1 d1 c0
>       [00] BAD  01 01 01 4d 4d d0 d0 00 a0 a0 f0 70 70 3e 80 80
>       [00] BAD  30 30 20 35 35 00 c4 c4 8f 8f 21 00 00 00 1a 1a
>       [00] BAD  00 00 00 00 00 fd 00 00 17 4c 4c 1f 1f 87 3c 3c
> ...
> [drm] *ERROR* Unexpected DP dual mode adaptor ID 54
> [drm] *ERROR* Unexpected DP dual mode adaptor ID 4f
> [drm] *ERROR* Unexpected DP dual mode adaptor ID 50
> [drm] *ERROR* Unexpected DP dual mode adaptor ID 54
> [drm] *ERROR* Unexpected DP dual mode adaptor ID 54
> ...
> [drm] *ERROR* Unexpected DP dual mode adaptor ID 50
> ...
> [drm] *ERROR* Unexpected DP dual mode adaptor ID 50
> [drm] *ERROR* Unexpected DP dual mode adaptor ID 4f
> ...
> [drm] *ERROR* Unexpected DP dual mode adaptor ID 54
> ...
> [drm] *ERROR* Unexpected DP dual mode adaptor ID 54
> [drm] *ERROR* Unexpected DP dual mode adaptor ID 54
> ...
> uvm_fault(0xffffffff82167400, 0xffff8000026c7000, 0, 1) -> e
>
>
> The disassembly of the function is:
>
> 00000000000037c0 <connector_bad_edid>:
>     37c0:       4c 8b 1d 00 00 00 00    mov    0(%rip),%r11        # 37c7 
> <connector_bad_edid+0x7>
>     37c7:       4c 33 1c 24             xor    (%rsp),%r11
>     37cb:       55                      push   %rbp
>     37cc:       48 89 e5                mov    %rsp,%rbp
>     37cf:       57                      push   %rdi
>     37d0:       56                      push   %rsi
>     37d1:       52                      push   %rdx
>     37d2:       57                      push   %rdi
>     37d3:       41 53                   push   %r11
>     37d5:       41 57                   push   %r15
>     37d7:       41 56                   push   %r14
>     37d9:       41 55                   push   %r13
>     37db:       41 54                   push   %r12
>     37dd:       53                      push   %rbx
>     37de:       48 83 ec 20             sub    $0x20,%rsp
>     37e2:       41 89 d7                mov    %edx,%r15d
>     37e5:       0f b6 46 7e             movzbl 0x7e(%rsi),%eax
>     37e9:       39 d0                   cmp    %edx,%eax
>     37eb:       0f 4f c2                cmovg  %edx,%eax
>     37ee:       0f b6 c0                movzbl %al,%eax
>     37f1:       48 c1 e0 07             shl    $0x7,%rax
>     37f5:       48 89 75 a8             mov    %rsi,0xffffffffffffffa8(%rbp)
>     37f9:       48 01 f0                add    %rsi,%rax
>     37fc:       31 d2                   xor    %edx,%edx
>     37fe:       b9 03 00 00 00          mov    $0x3,%ecx
>     3803:       eb 0b                   jmp    3810 <connector_bad_edid+0x50>
>     3805:       cc                      int3   
>     3806:       cc                      int3   
>     3807:       cc                      int3   
>     3808:       cc                      int3   
>     3809:       cc                      int3   
>     380a:       cc                      int3   
>     380b:       cc                      int3   
>     380c:       cc                      int3   
>     380d:       cc                      int3   
>     380e:       cc                      int3   
>     380f:       cc                      int3   
>     3810:       0f b6 74 08 fd          movzbl 
> 0xfffffffffffffffd(%rax,%rcx,1),%esi
>     3815:       01 d6                   add    %edx,%esi
>     3817:       0f b6 54 08 fe          movzbl 
> 0xfffffffffffffffe(%rax,%rcx,1),%edx
>     381c:       01 f2                   add    %esi,%edx
>     381e:       0f b6 f2                movzbl %dl,%esi
>     3821:       0f b6 54 08 ff          movzbl 
> 0xffffffffffffffff(%rax,%rcx,1),%edx
>     3826:       01 f2                   add    %esi,%edx
>     3828:       48 83 f9 7f             cmp    $0x7f,%rcx
>     382c:       74 0f                   je     383d <connector_bad_edid+0x7d>
>     382e:       0f b6 34 08             movzbl (%rax,%rcx,1),%esi
>     3832:       01 f2                   add    %esi,%edx
>     3834:       0f b6 d2                movzbl %dl,%edx
>     3837:       48 83 c1 04             add    $0x4,%rcx
>     383b:       eb d3                   jmp    3810 <connector_bad_edid+0x50>
>     383d:       f6 da                   neg    %dl
>     383f:       88 97 19 04 00 00       mov    %dl,0x419(%rdi)
>     3845:       8b 87 14 04 00 00       mov    0x414(%rdi),%eax
>     384b:       8d 48 01                lea    0x1(%rax),%ecx
>     384e:       89 8f 14 04 00 00       mov    %ecx,0x414(%rdi)
>     3854:       85 c0                   test   %eax,%eax
>     3856:       74 0d                   je     3865 <connector_bad_edid+0xa5>
> };
>
> static inline bool drm_debug_enabled(enum drm_debug_category category)
> {
>         return unlikely(__drm_debug & category);
>     3858:       f6 05 00 00 00 00 04    testb  $0x4,0(%rip)        # 385f 
> <connector_bad_edid+0x9f>

Reply via email to