Hi,

This is continuation from matrix chat. There is an occasional failure on
qubes-hw2 gitlab runner that console become stuck during boot. I can now
reproduce it _much_ more often on another system, and the serial console output
ends with:

    (XEN) Allocated console ring of 256 KiB.
    (XEN) Using HWP for cpufreq
    (XEN) mwait-idle: does not run on family 6

It should be:

    (XEN) Allocated console ring of 256 KiB.
    (XEN) Using HWP for cpufreq
    (XEN) mwait-idle: does not run on family 6 model 183
    (XEN) VMX: Supported advanced features:
    (XEN)  - APIC MMIO access virtualisation
    (XEN)  - APIC TPR shadow
    ...


Otherwise the system works perfectly fine, the logs are available in
full via `xl dmesg` etc. Doing (any?) `xl debug-key` unblocks the
console and missing logs gets dumped there too. I narrowed it down to
the serial console tx buffer and collected some info with the attacked
patch (it collects info still during boot, after the place where it
usually breaks). When it works, I get:

    (XEN) SERIAL DEBUG: txbufc: 0x5b5, txbufp: 0x9f7, uart intr_works: 1, 
serial_txbufsz: 0x4000, tx_ready: 0, lsr_mask: 0x20, msi: 0, io_size: 8, 
skipped_interrupts: 0

And when it breaks, I get:

    (XEN) SERIAL DEBUG: txbufc: 0x70, txbufp: 0x9fd, uart intr_works: 1, 
serial_txbufsz: 0x4000, tx_ready: 16, lsr_mask: 0x20, msi: 0, io_size: 8, 
skipped_interrupts: 0

So, I haven't found yet why it stops sending data. I'll continue adding
some debug prints etc, but if anyone has some ideas what is going on,
I'd appreciate hints.


Full `xl dmesg` of a broken case can be seen here:
https://openqa.qubes-os.org/tests/89100/logfile?filename=serial_terminal.txt
Similarly, example working case is here:
https://openqa.qubes-os.org/tests/89099/logfile?filename=serial0.txt

-- 
Best Regards,
Marek Marczykowski-Górecki
Invisible Things Lab
diff --git a/xen/arch/x86/pv/dom0_build.c b/xen/arch/x86/pv/dom0_build.c
index 5bbed3a36a..39ad935266 100644
--- a/xen/arch/x86/pv/dom0_build.c
+++ b/xen/arch/x86/pv/dom0_build.c
@@ -354,6 +354,8 @@ static struct page_info * __init alloc_chunk(struct domain 
*d,
     return page;
 }
 
+extern void serial_debug(void);
+
 int __init dom0_construct_pv(struct domain *d,
                              const module_t *image,
                              unsigned long image_headroom,
@@ -412,6 +414,7 @@ int __init dom0_construct_pv(struct domain *d,
     /* Machine address of next candidate page-table page. */
     paddr_t mpt_alloc;
 
+    serial_debug();
     printk(XENLOG_INFO "*** Building a PV Dom%d ***\n", d->domain_id);
 
     d->max_pages = ~0U;
diff --git a/xen/drivers/char/serial.c b/xen/drivers/char/serial.c
index 324024c29a..d6a4d62a07 100644
--- a/xen/drivers/char/serial.c
+++ b/xen/drivers/char/serial.c
@@ -71,6 +71,80 @@ void serial_rx_interrupt(struct serial_port *port, struct 
cpu_user_regs *regs)
         (*fn)(c & 0x7f, regs);
 }
 
+#include <xen/console.h>
+#include <xen/init.h>
+#include <xen/irq.h>
+#include <xen/param.h>
+#include <xen/sched.h>
+#include <xen/timer.h>
+#include <xen/serial.h>
+#include <xen/iocap.h>
+#ifdef NS16550_PCI
+#include <xen/pci.h>
+#include <xen/pci_regs.h>
+#include <xen/pci_ids.h>
+#endif
+#include <xen/8250-uart.h>
+#include <xen/vmap.h>
+#include <asm/io.h>
+#ifdef CONFIG_HAS_DEVICE_TREE
+#include <asm/device.h>
+#endif
+#ifdef CONFIG_X86
+#include <asm/fixmap.h>
+#endif
+
+#define NS16550_PCI
+
+struct ns16550 {
+    int baud, clock_hz, data_bits, parity, stop_bits, fifo_size, irq;
+    u64 io_base;   /* I/O port or memory-mapped I/O address. */
+    u64 io_size;
+    int reg_shift; /* Bits to shift register offset by */
+    int reg_width; /* Size of access to use, the registers
+                    * themselves are still bytes */
+    char __iomem *remapped_io_base;  /* Remapped virtual address of MMIO. */
+    /* UART with IRQ line: interrupt-driven I/O. */
+    struct irqaction irqaction;
+    u8 lsr_mask;
+#ifdef CONFIG_ARM
+    struct vuart_info vuart;
+#endif
+    /* UART with no IRQ line: periodically-polled I/O. */
+    struct timer timer;
+    struct timer resume_timer;
+    unsigned int timeout_ms;
+    bool intr_works;
+    bool dw_usr_bsy;
+#ifdef NS16550_PCI
+    /* PCI card parameters. */
+    bool pb_bdf_enable;     /* if =1, pb-bdf effective, port behind bridge */
+    bool ps_bdf_enable;     /* if =1, ps_bdf effective, port on pci card */
+    unsigned int pb_bdf[3]; /* pci bridge BDF */
+    unsigned int ps_bdf[3]; /* pci serial port BDF */
+    u32 bar;
+    u32 bar64;
+    u16 cr;
+    u8 bar_idx;
+    bool msi;
+    const struct ns16550_config_param *param; /* Points into .init.*! */
+#endif
+};
+
+int skipped_interrupts = 0;
+
+void serial_debug(void)
+{
+    struct serial_port *port = &com[SERHND_COM1];
+    struct ns16550 *uart = port->uart;
+    printk("SERIAL DEBUG: txbufc: %#x, txbufp: %#x, uart intr_works: %d, 
serial_txbufsz: %#x, tx_ready: %d, "
+            "lsr_mask: %#x, msi: %d, io_size: %ld, skipped_interrupts: %d\n",
+            port->txbufc, port->txbufp, uart->intr_works,
+            serial_txbufsz,
+            port->driver->tx_ready(port),
+            uart->lsr_mask, uart->msi, uart->io_size, skipped_interrupts);
+}
+
 void serial_tx_interrupt(struct serial_port *port, struct cpu_user_regs *regs)
 {
     int i, n;
@@ -85,8 +159,10 @@ void serial_tx_interrupt(struct serial_port *port, struct 
cpu_user_regs *regs)
      */
     while ( !spin_trylock(&port->tx_lock) )
     {
-        if ( port->driver->tx_ready(port) <= 0 )
+        if ( port->driver->tx_ready(port) <= 0 ) {
+            skipped_interrupts++;
             goto out;
+        }
         cpu_relax();
     }
 

Attachment: signature.asc
Description: PGP signature

Reply via email to