[PATCH] tty: fix ldisc flush and termios setting race
From: Min Zhang A race condition can clear tty ldisc icanon bit unintentionally which could stop n_tty from processing received characters. It can occur when tty receiver buffer was full, e.g. 4096 chars received, 8250 serial driver interrupt tried to flush_to_ldisc them, but other shell thread tried to change_termios the tty setting too. Then n_tty_receive_char and n_tty_set_termios both tried to modify n_tty_data fields. Specifically the icanon and its neighbor fields are defines as bits: unsigned char lnext:1, erasing:1, raw:1, real_raw:1, icanon:1; However they are not atomically accessed in the follow race condition: serial8250_handle_irq serail8250_rx_chars tty_flip_buffer_push schdule_work ---> flush_to_ldisc n_tty_receive_buf n_tty_receive_char (holds no lock) ioctl set_termios tty_set_termios n_tty_set_termios (holds termios_mutex) The patch let change_termios to use TTY_LDISC_FLUSHING to prevent flush_to_ldisc from running, and flush_to_ldisc use TTY_FLUSHING to make change_termios wait until the flag is cleared. The patch also replaces flush_to_ldisc's wake_up with wake_up_all, because it can now wake up both TTY_FLUSHING and TTY_FLUSHPENDING on the same tty->read_wait queue. Event waiters need check which event. Signed-off-by: Min Zhang --- drivers/tty/tty_buffer.c | 12 +++- drivers/tty/tty_ioctl.c | 27 ++- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index 45d9161..37f4818 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -482,6 +482,13 @@ static void flush_to_ldisc(struct work_struct *work) spin_lock_irqsave(>lock, flags); + /* Ldisc change by change_termios can race with ldisc receive_buf, esp + to access N_TTY line discipline field in tty_struct, so we defer */ + if (test_bit(TTY_LDISC_CHANGING, >flags)) { + schedule_delayed_work(>work, 1); + goto out; + } + if (!test_and_set_bit(TTYP_FLUSHING, >iflags)) { struct tty_buffer *head; while ((head = buf->head) != NULL) { @@ -522,8 +529,11 @@ static void flush_to_ldisc(struct work_struct *work) if (test_bit(TTYP_FLUSHPENDING, >iflags)) { __tty_buffer_flush(port); clear_bit(TTYP_FLUSHPENDING, >iflags); - wake_up(>read_wait); } + + /* wake up tasks waiting for TTYP_FLUSHING or TTYP_FLUSHPENDING */ + wake_up_all(>read_wait); +out: spin_unlock_irqrestore(>lock, flags); tty_ldisc_deref(disc); diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c index 8481b29..36a1bfa 100644 --- a/drivers/tty/tty_ioctl.c +++ b/drivers/tty/tty_ioctl.c @@ -546,8 +546,33 @@ int tty_set_termios(struct tty_struct *tty, struct ktermios *new_termios) ld = tty_ldisc_ref(tty); if (ld != NULL) { - if (ld->ops->set_termios) + unsigned long flags; + struct tty_port *port = tty->port; + struct tty_bufhead *buf = >buf; + if (!ld->ops->set_termios) + goto no_set_termios; + + /* Wait if the data is being pushed to the tty layer */ + spin_lock_irqsave(>lock, flags); + while (test_bit(TTYP_FLUSHING, >iflags)) { + spin_unlock_irqrestore(>lock, flags); + printk(KERN_CRIT "%s flushing\n", __FUNCTION__); + wait_event(tty->read_wait, + test_bit(TTYP_FLUSHING, >iflags) == 0); + spin_lock_irqsave(>lock, flags); + } + printk(KERN_CRIT "%s survived flushing\n", __FUNCTION__); + + /* Prevent future flush_to_ldisc while we are setting */ + if (!test_and_set_bit(TTY_LDISC_CHANGING, >flags)) { + spin_unlock_irqrestore(>lock, flags); (ld->ops->set_termios)(tty, _termios); + spin_lock_irqsave(>lock, flags); + clear_bit(TTY_LDISC_CHANGING, >flags); + } + spin_unlock_irqrestore(>lock, flags); + +no_set_termios: tty_ldisc_deref(ld); } mutex_unlock(>termios_mutex); -- 1.7.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] tty: fix ldisc flush and termios setting race
From: Min Zhang mzh...@mvista.com A race condition can clear tty ldisc icanon bit unintentionally which could stop n_tty from processing received characters. It can occur when tty receiver buffer was full, e.g. 4096 chars received, 8250 serial driver interrupt tried to flush_to_ldisc them, but other shell thread tried to change_termios the tty setting too. Then n_tty_receive_char and n_tty_set_termios both tried to modify n_tty_data fields. Specifically the icanon and its neighbor fields are defines as bits: unsigned char lnext:1, erasing:1, raw:1, real_raw:1, icanon:1; However they are not atomically accessed in the follow race condition: serial8250_handle_irq serail8250_rx_chars tty_flip_buffer_push schdule_work --- flush_to_ldisc n_tty_receive_buf n_tty_receive_char (holds no lock) ioctl set_termios tty_set_termios n_tty_set_termios (holds termios_mutex) The patch let change_termios to use TTY_LDISC_FLUSHING to prevent flush_to_ldisc from running, and flush_to_ldisc use TTY_FLUSHING to make change_termios wait until the flag is cleared. The patch also replaces flush_to_ldisc's wake_up with wake_up_all, because it can now wake up both TTY_FLUSHING and TTY_FLUSHPENDING on the same tty-read_wait queue. Event waiters need check which event. Signed-off-by: Min Zhang mzh...@mvista.com --- drivers/tty/tty_buffer.c | 12 +++- drivers/tty/tty_ioctl.c | 27 ++- 2 files changed, 37 insertions(+), 2 deletions(-) diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c index 45d9161..37f4818 100644 --- a/drivers/tty/tty_buffer.c +++ b/drivers/tty/tty_buffer.c @@ -482,6 +482,13 @@ static void flush_to_ldisc(struct work_struct *work) spin_lock_irqsave(buf-lock, flags); + /* Ldisc change by change_termios can race with ldisc receive_buf, esp + to access N_TTY line discipline field in tty_struct, so we defer */ + if (test_bit(TTY_LDISC_CHANGING, tty-flags)) { + schedule_delayed_work(buf-work, 1); + goto out; + } + if (!test_and_set_bit(TTYP_FLUSHING, port-iflags)) { struct tty_buffer *head; while ((head = buf-head) != NULL) { @@ -522,8 +529,11 @@ static void flush_to_ldisc(struct work_struct *work) if (test_bit(TTYP_FLUSHPENDING, port-iflags)) { __tty_buffer_flush(port); clear_bit(TTYP_FLUSHPENDING, port-iflags); - wake_up(tty-read_wait); } + + /* wake up tasks waiting for TTYP_FLUSHING or TTYP_FLUSHPENDING */ + wake_up_all(tty-read_wait); +out: spin_unlock_irqrestore(buf-lock, flags); tty_ldisc_deref(disc); diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c index 8481b29..36a1bfa 100644 --- a/drivers/tty/tty_ioctl.c +++ b/drivers/tty/tty_ioctl.c @@ -546,8 +546,33 @@ int tty_set_termios(struct tty_struct *tty, struct ktermios *new_termios) ld = tty_ldisc_ref(tty); if (ld != NULL) { - if (ld-ops-set_termios) + unsigned long flags; + struct tty_port *port = tty-port; + struct tty_bufhead *buf = port-buf; + if (!ld-ops-set_termios) + goto no_set_termios; + + /* Wait if the data is being pushed to the tty layer */ + spin_lock_irqsave(buf-lock, flags); + while (test_bit(TTYP_FLUSHING, port-iflags)) { + spin_unlock_irqrestore(buf-lock, flags); + printk(KERN_CRIT %s flushing\n, __FUNCTION__); + wait_event(tty-read_wait, + test_bit(TTYP_FLUSHING, port-iflags) == 0); + spin_lock_irqsave(buf-lock, flags); + } + printk(KERN_CRIT %s survived flushing\n, __FUNCTION__); + + /* Prevent future flush_to_ldisc while we are setting */ + if (!test_and_set_bit(TTY_LDISC_CHANGING, tty-flags)) { + spin_unlock_irqrestore(buf-lock, flags); (ld-ops-set_termios)(tty, old_termios); + spin_lock_irqsave(buf-lock, flags); + clear_bit(TTY_LDISC_CHANGING, tty-flags); + } + spin_unlock_irqrestore(buf-lock, flags); + +no_set_termios: tty_ldisc_deref(ld); } mutex_unlock(tty-termios_mutex); -- 1.7.7.4 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCHv2] serial: 8250 check iir rdi in interrupt
The patch works around two UART interrupt bugs when the serial console is flooded with inputs: 1. syslog shows "serial8250: too much works for irq" 2. serial console stops responding to key stroke serial8250_handle_irq() checks UART_IIR_RDI before reading receive fifo and clears bogus interrupt UART_IIR_RDI without accompanying UART_LSR_DR, otherwise RDI interrupt could freeze or too many unhandled RDI interrupts. Added module parameter skip_rdi_check to opt out this workaround. Tested on Radisys ATCA 46XX which uses FPGA 16550-compatible and other generic 16550 UART. It takes from an hour to days to reproduce by pumping inputs to serial console continously using TeraTerm script: Signed-off-by: Min Zhang --- drivers/tty/serial/8250/8250.c | 50 1 files changed, 50 insertions(+), 0 deletions(-) diff --git a/drivers/tty/serial/8250/8250.c b/drivers/tty/serial/8250/8250.c index 3ba4234..dfc13d1 100644 --- a/drivers/tty/serial/8250/8250.c +++ b/drivers/tty/serial/8250/8250.c @@ -64,6 +64,7 @@ static int serial_index(struct uart_port *port) } static unsigned int skip_txen_test; /* force skip of txen test at init time */ +static unsigned int skip_rdi_check = 1; /* skip of IIR RDI check in interrupt */ /* * Debugging. @@ -1479,6 +1480,46 @@ unsigned int serial8250_modem_status(struct uart_8250_port *up) EXPORT_SYMBOL_GPL(serial8250_modem_status); /* + * Check if status UART_LSR_RD accompanies with interrupt UART_IIR_RDI. + * If they are mismatch, massage the status or interupt cause accordingly: + * + * Return a cleared UART_LSR_RD status if there is no accompanying + * UART_IIR_RDI. Hopefully the new status is used by interrupt handler + * to skip reading receive FIFO. Otherwise some UART controller stops + * generating RDI interrupt after this unnotified FIFO read, until other + * interrupts maybe transmit interrupt reads UART_LSR again. + * + * Or clear interrupt cause UART_IIR_RDI without UART_LSR_RD. The UART sets + * UART_IIR_RDI *even* if the received data has been read out from the FIFO + * before the timeout occurs. To clear UART_IIR_RDI, read receive buffer + * register. Reading it also clears timeout interrupt for 16550+. Otherwise + * the uncleared UART_IIR_RDI will keep triggering IRQ but interrupt + * handler finds nothing to do. + * + * Skip this workaround if interrupt is not expected, such as backup timer, + * so that handler can still solely rely on original status register. + */ +static inline unsigned char serial8250_iir_rdi_check(struct uart_8250_port *up, +unsigned char status, +unsigned int iir) +{ + unsigned int rdi_stat, rdi_intr; + + /* skip for timer based handler */ + if (up->timer.data) + return status; + + rdi_stat = status & UART_LSR_DR; + rdi_intr = iir & UART_IIR_RDI; + + if (rdi_stat && !rdi_intr) + status &= ~UART_LSR_DR; + else if (!rdi_stat && rdi_intr) + serial_in(up, UART_RX); + return status; +} + +/* * This handles the interrupt from one port. */ int serial8250_handle_irq(struct uart_port *port, unsigned int iir) @@ -1497,6 +1538,12 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir) DEBUG_INTR("status = %x...", status); + /* Some UART controller has mismatched UART_IIR_RDI and UART_LSR_DR, + which causes either too many interrupts or interrupt freeze +*/ + if (!skip_rdi_check) + status = serial8250_iir_rdi_check(up, status, iir); + if (status & (UART_LSR_DR | UART_LSR_BI)) status = serial8250_rx_chars(up, status); serial8250_modem_status(up); @@ -3338,6 +3385,9 @@ MODULE_PARM_DESC(nr_uarts, "Maximum number of UARTs supported. (1-" __MODULE_STR module_param(skip_txen_test, uint, 0644); MODULE_PARM_DESC(skip_txen_test, "Skip checking for the TXEN bug at init time"); +module_param(skip_rdi_check, uint, 0644); +MODULE_PARM_DESC(skip_rdi_check, "Skip checking IIR RDI bug in interrupt"); + #ifdef CONFIG_SERIAL_8250_RSA module_param_array(probe_rsa, ulong, _rsa_count, 0444); MODULE_PARM_DESC(probe_rsa, "Probe I/O ports for RSA"); -- 1.7.0.1 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] serial: 8250 check iir rdi in interrupt
On Fri, Oct 26, 2012 at 7:19 AM, Alan Cox wrote: > So we only need to check this in serial8250_handle_irq when IIR indicates > a data timeout interrupt ? > > Can we do > > if ((iir & 0x0F) == 0x0C) { > /* Expensive RDI check */ > } > > > > Alan Checking data timeout interrupt is only for "too much work for irq" problem. There is another console freeze problem which is caused by reading receive FIFO when there is no RDI interrupt, such as during THRI transmit interrupt, so no timeout interrupt. This time one has to check two both IIR and LSR. After all these checking, it becomes the original patch anyway. I am posting another simpler revision to exclude timer handler from this workaround, and make this workaround default off and inline. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] serial: 8250 check iir rdi in interrupt
On Fri, Oct 26, 2012 at 7:19 AM, Alan Cox a...@lxorguk.ukuu.org.uk wrote: So we only need to check this in serial8250_handle_irq when IIR indicates a data timeout interrupt ? Can we do if ((iir 0x0F) == 0x0C) { /* Expensive RDI check */ } Alan Checking data timeout interrupt is only for too much work for irq problem. There is another console freeze problem which is caused by reading receive FIFO when there is no RDI interrupt, such as during THRI transmit interrupt, so no timeout interrupt. This time one has to check two both IIR and LSR. After all these checking, it becomes the original patch anyway. I am posting another simpler revision to exclude timer handler from this workaround, and make this workaround default off and inline. -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCHv2] serial: 8250 check iir rdi in interrupt
The patch works around two UART interrupt bugs when the serial console is flooded with inputs: 1. syslog shows serial8250: too much works for irq 2. serial console stops responding to key stroke serial8250_handle_irq() checks UART_IIR_RDI before reading receive fifo and clears bogus interrupt UART_IIR_RDI without accompanying UART_LSR_DR, otherwise RDI interrupt could freeze or too many unhandled RDI interrupts. Added module parameter skip_rdi_check to opt out this workaround. Tested on Radisys ATCA 46XX which uses FPGA 16550-compatible and other generic 16550 UART. It takes from an hour to days to reproduce by pumping inputs to serial console continously using TeraTerm script: Signed-off-by: Min Zhang mzh...@mvista.com --- drivers/tty/serial/8250/8250.c | 50 1 files changed, 50 insertions(+), 0 deletions(-) diff --git a/drivers/tty/serial/8250/8250.c b/drivers/tty/serial/8250/8250.c index 3ba4234..dfc13d1 100644 --- a/drivers/tty/serial/8250/8250.c +++ b/drivers/tty/serial/8250/8250.c @@ -64,6 +64,7 @@ static int serial_index(struct uart_port *port) } static unsigned int skip_txen_test; /* force skip of txen test at init time */ +static unsigned int skip_rdi_check = 1; /* skip of IIR RDI check in interrupt */ /* * Debugging. @@ -1479,6 +1480,46 @@ unsigned int serial8250_modem_status(struct uart_8250_port *up) EXPORT_SYMBOL_GPL(serial8250_modem_status); /* + * Check if status UART_LSR_RD accompanies with interrupt UART_IIR_RDI. + * If they are mismatch, massage the status or interupt cause accordingly: + * + * Return a cleared UART_LSR_RD status if there is no accompanying + * UART_IIR_RDI. Hopefully the new status is used by interrupt handler + * to skip reading receive FIFO. Otherwise some UART controller stops + * generating RDI interrupt after this unnotified FIFO read, until other + * interrupts maybe transmit interrupt reads UART_LSR again. + * + * Or clear interrupt cause UART_IIR_RDI without UART_LSR_RD. The UART sets + * UART_IIR_RDI *even* if the received data has been read out from the FIFO + * before the timeout occurs. To clear UART_IIR_RDI, read receive buffer + * register. Reading it also clears timeout interrupt for 16550+. Otherwise + * the uncleared UART_IIR_RDI will keep triggering IRQ but interrupt + * handler finds nothing to do. + * + * Skip this workaround if interrupt is not expected, such as backup timer, + * so that handler can still solely rely on original status register. + */ +static inline unsigned char serial8250_iir_rdi_check(struct uart_8250_port *up, +unsigned char status, +unsigned int iir) +{ + unsigned int rdi_stat, rdi_intr; + + /* skip for timer based handler */ + if (up-timer.data) + return status; + + rdi_stat = status UART_LSR_DR; + rdi_intr = iir UART_IIR_RDI; + + if (rdi_stat !rdi_intr) + status = ~UART_LSR_DR; + else if (!rdi_stat rdi_intr) + serial_in(up, UART_RX); + return status; +} + +/* * This handles the interrupt from one port. */ int serial8250_handle_irq(struct uart_port *port, unsigned int iir) @@ -1497,6 +1538,12 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir) DEBUG_INTR(status = %x..., status); + /* Some UART controller has mismatched UART_IIR_RDI and UART_LSR_DR, + which causes either too many interrupts or interrupt freeze +*/ + if (!skip_rdi_check) + status = serial8250_iir_rdi_check(up, status, iir); + if (status (UART_LSR_DR | UART_LSR_BI)) status = serial8250_rx_chars(up, status); serial8250_modem_status(up); @@ -3338,6 +3385,9 @@ MODULE_PARM_DESC(nr_uarts, Maximum number of UARTs supported. (1- __MODULE_STR module_param(skip_txen_test, uint, 0644); MODULE_PARM_DESC(skip_txen_test, Skip checking for the TXEN bug at init time); +module_param(skip_rdi_check, uint, 0644); +MODULE_PARM_DESC(skip_rdi_check, Skip checking IIR RDI bug in interrupt); + #ifdef CONFIG_SERIAL_8250_RSA module_param_array(probe_rsa, ulong, probe_rsa_count, 0444); MODULE_PARM_DESC(probe_rsa, Probe I/O ports for RSA); -- 1.7.0.1 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] serial: 8250 check iir rdi in interrupt
On Tue, Oct 23, 2012 at 3:01 AM, Alan Cox wrote: > > > Added module parameter skip_rdi_check to opt out this workaround. > > NAK. Anything like this should be runtime. One can echo 1 (or 0) > /sys/modules/8250/parameters/skip_rdi_check during run time to turn it off (or on) dynamically. Does it count as runtime? > > Tested on Radisys ATCA 46XX which uses FPGA 16550-compatible and > > other generic 16550 UART. It takes from an hour to days to reproduce by > > pumping inputs to serial console continously using TeraTerm script: > > You turn this on by default but it's a nasty IRQ latency penalty > on a lot of x86 platforms with the uarts on the lpc bus. I agree. Will this patch be more acceptable if default is off? I can't narrow it hardware down since it is all generic UART., > What I am not clear on from this is > > - do you see it on both the ports (the bug that is) No, each hardware only has one serial console port that has traffic, and only one of the two symptom occur on one type of hardware. That is hardware 1 ttyS0 has "too much work for irq", and hardware 2 ttyS0 has console freeze under a separate test. I group them together since they occur using the same console flooding test script and under similar RDI root cause. > - if you do see it on both are you sure its not in reality a symptom of > some other console/irq handling race ? It is racing. For "too much work for irq", here is sequence events analyzed by a Motorola engineer: 1) Data arrives in the FIFO, but not enough to cause an interrupt 2) The transmitter is started. 3) A transmit needs data interrupt occurs (0xC2 in the IIR) 4) The processing function is called and it reads the LSR 5) The LSR indicates that the transmitter needs data, but also indicates the presence of data in the FIFO (0x61 in the LSR) 6) The processing function receives the characters, and outputs data to the FIFO 7) At the exact time (very very small window) that the character is read from the FIFO, the FIFO timeout occurs locking in an interrupt cause 8) The next loop through the interrupt code begins 9) The IIR now indicates the data timeout interrupt (0xCC in the IIR) 10) The processing function is called and it reads the LSR 11) The LSR is 0 indicating nothing to do 12) The interrupt loop continues (the IIR won't clear until a character is pulled) until it reaches its max count and displays the error. The other console freeze symptom is caused by similar sequence. The last interrupt before interrupt stops always shows IIR=0xC2 and LSR=0x21, which means has transmit interrupt but both transmit and receive status. After interrupt stops, i insmod a module to force read: IIR=0xC6, IER=0x0F, still no interrupt. Then I read LSR=0xE3., which is what the next interrupt would have done, makes interrupt resume again. Instead of force reading LSR, I can also resume interrupt by forcing a printk, which triggers a new transmit interrupt that reads LSR anyway. > > Alan -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] serial: 8250 check iir rdi in interrupt
On Tue, Oct 23, 2012 at 3:01 AM, Alan Cox a...@lxorguk.ukuu.org.uk wrote: Added module parameter skip_rdi_check to opt out this workaround. NAK. Anything like this should be runtime. One can echo 1 (or 0) /sys/modules/8250/parameters/skip_rdi_check during run time to turn it off (or on) dynamically. Does it count as runtime? Tested on Radisys ATCA 46XX which uses FPGA 16550-compatible and other generic 16550 UART. It takes from an hour to days to reproduce by pumping inputs to serial console continously using TeraTerm script: You turn this on by default but it's a nasty IRQ latency penalty on a lot of x86 platforms with the uarts on the lpc bus. I agree. Will this patch be more acceptable if default is off? I can't narrow it hardware down since it is all generic UART., What I am not clear on from this is - do you see it on both the ports (the bug that is) No, each hardware only has one serial console port that has traffic, and only one of the two symptom occur on one type of hardware. That is hardware 1 ttyS0 has too much work for irq, and hardware 2 ttyS0 has console freeze under a separate test. I group them together since they occur using the same console flooding test script and under similar RDI root cause. - if you do see it on both are you sure its not in reality a symptom of some other console/irq handling race ? It is racing. For too much work for irq, here is sequence events analyzed by a Motorola engineer: 1) Data arrives in the FIFO, but not enough to cause an interrupt 2) The transmitter is started. 3) A transmit needs data interrupt occurs (0xC2 in the IIR) 4) The processing function is called and it reads the LSR 5) The LSR indicates that the transmitter needs data, but also indicates the presence of data in the FIFO (0x61 in the LSR) 6) The processing function receives the characters, and outputs data to the FIFO 7) At the exact time (very very small window) that the character is read from the FIFO, the FIFO timeout occurs locking in an interrupt cause 8) The next loop through the interrupt code begins 9) The IIR now indicates the data timeout interrupt (0xCC in the IIR) 10) The processing function is called and it reads the LSR 11) The LSR is 0 indicating nothing to do 12) The interrupt loop continues (the IIR won't clear until a character is pulled) until it reaches its max count and displays the error. The other console freeze symptom is caused by similar sequence. The last interrupt before interrupt stops always shows IIR=0xC2 and LSR=0x21, which means has transmit interrupt but both transmit and receive status. After interrupt stops, i insmod a module to force read: IIR=0xC6, IER=0x0F, still no interrupt. Then I read LSR=0xE3., which is what the next interrupt would have done, makes interrupt resume again. Instead of force reading LSR, I can also resume interrupt by forcing a printk, which triggers a new transmit interrupt that reads LSR anyway. Alan -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] serial: 8250 check iir rdi in interrupt
The patch works around two UART interrupt bugs when the serial console is flooded with inputs: 1. syslog shows "serial8250: too much works for irq" 2. serial console stops responding to key stroke serial8250_handle_irq() checks UART_IIR_RDI before reading receive fifo and clears bogus interrupt UART_IIR_RDI without accompanying UART_LSR_DR, otherwise RDI interrupt could freeze or too many unhandled RDI interrupts. Added module parameter skip_rdi_check to opt out this workaround. Tested on Radisys ATCA 46XX which uses FPGA 16550-compatible and other generic 16550 UART. It takes from an hour to days to reproduce by pumping inputs to serial console continously using TeraTerm script: --- drivers/tty/serial/8250/8250.c | 55 1 files changed, 55 insertions(+), 0 deletions(-) diff --git a/drivers/tty/serial/8250/8250.c b/drivers/tty/serial/8250/8250.c index 3ba4234..838dd22 100644 --- a/drivers/tty/serial/8250/8250.c +++ b/drivers/tty/serial/8250/8250.c @@ -64,6 +64,7 @@ static int serial_index(struct uart_port *port) } static unsigned int skip_txen_test; /* force skip of txen test at init time */ +static unsigned int skip_rdi_check; /* skip of IIR RDI check in interrupt */ /* * Debugging. @@ -1479,6 +1480,51 @@ unsigned int serial8250_modem_status(struct uart_8250_port *up) EXPORT_SYMBOL_GPL(serial8250_modem_status); /* + * Check if status UART_LSR_RD accompanies with interrupt UART_IIR_RDI. + * If they are mismatch, massage the status or interupt cause accordingly: + * + * Return a cleared UART_LSR_RD status if there is no accompanying + * UART_IIR_RDI. Hopefully the new status is used by interrupt handler + * to skip reading receive FIFO. Otherwise some UART controller stops + * generating RDI interrupt after this unnotified FIFO read, until other + * interrupts maybe transmit interrupt reads UART_LSR again. + * + * Or clear interrupt cause UART_IIR_RDI without UART_LSR_RD. The UART sets + * UART_IIR_RDI *even* if the received data has been read out from the FIFO + * before the timeout occurs. To clear UART_IIR_RDI, read receive buffer + * register. Reading it also clears timeout interrupt for 16550+. Otherwise + * the uncleared UART_IIR_RDI will keep triggering IRQ but interrupt + * handler finds nothing to do. + * + * Skip this workaround if interrupt is not expected, such as backup timer, + * so that handler can still solely rely on original status register. + */ +static unsigned char serial8250_iir_rdi_check(struct uart_8250_port *up, +unsigned char status, +unsigned int iir) +{ + unsigned int ier, rdi_stat, rdi_intr; + + /* skip for handler without interrupt */ + if (!up->port.irq) + return status; + + /* skip for polling handler such as backup timer */ + ier = serial_in(up, UART_IER); + if (!(ier & UART_IER_RDI)) + return status; + + rdi_stat = status & UART_LSR_DR; + rdi_intr = iir & UART_IIR_RDI; + + if (rdi_stat && !rdi_intr) + status &= ~UART_LSR_DR; + else if (!rdi_stat && rdi_intr) + serial_in(up, UART_RX); + return status; +} + +/* * This handles the interrupt from one port. */ int serial8250_handle_irq(struct uart_port *port, unsigned int iir) @@ -1497,6 +1543,12 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir) DEBUG_INTR("status = %x...", status); + /* Some UART controller has mismatched UART_IIR_RDI and UART_LSR_DR, + which causes either too many interrupts or interrupt freeze +*/ + if (!skip_rdi_check) + status = serial8250_iir_rdi_check(up, status, iir); + if (status & (UART_LSR_DR | UART_LSR_BI)) status = serial8250_rx_chars(up, status); serial8250_modem_status(up); @@ -3338,6 +3390,9 @@ MODULE_PARM_DESC(nr_uarts, "Maximum number of UARTs supported. (1-" __MODULE_STR module_param(skip_txen_test, uint, 0644); MODULE_PARM_DESC(skip_txen_test, "Skip checking for the TXEN bug at init time"); +module_param(skip_rdi_check, uint, 0644); +MODULE_PARM_DESC(skip_rdi_check, "Skip checking IIR RDI bug in interrupt"); + #ifdef CONFIG_SERIAL_8250_RSA module_param_array(probe_rsa, ulong, _rsa_count, 0444); MODULE_PARM_DESC(probe_rsa, "Probe I/O ports for RSA"); -- 1.7.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] serial: 8250 check iir rdi in interrupt
The patch works around two UART interrupt bugs when the serial console is flooded with inputs: 1. syslog shows serial8250: too much works for irq 2. serial console stops responding to key stroke serial8250_handle_irq() checks UART_IIR_RDI before reading receive fifo and clears bogus interrupt UART_IIR_RDI without accompanying UART_LSR_DR, otherwise RDI interrupt could freeze or too many unhandled RDI interrupts. Added module parameter skip_rdi_check to opt out this workaround. Tested on Radisys ATCA 46XX which uses FPGA 16550-compatible and other generic 16550 UART. It takes from an hour to days to reproduce by pumping inputs to serial console continously using TeraTerm script: --- drivers/tty/serial/8250/8250.c | 55 1 files changed, 55 insertions(+), 0 deletions(-) diff --git a/drivers/tty/serial/8250/8250.c b/drivers/tty/serial/8250/8250.c index 3ba4234..838dd22 100644 --- a/drivers/tty/serial/8250/8250.c +++ b/drivers/tty/serial/8250/8250.c @@ -64,6 +64,7 @@ static int serial_index(struct uart_port *port) } static unsigned int skip_txen_test; /* force skip of txen test at init time */ +static unsigned int skip_rdi_check; /* skip of IIR RDI check in interrupt */ /* * Debugging. @@ -1479,6 +1480,51 @@ unsigned int serial8250_modem_status(struct uart_8250_port *up) EXPORT_SYMBOL_GPL(serial8250_modem_status); /* + * Check if status UART_LSR_RD accompanies with interrupt UART_IIR_RDI. + * If they are mismatch, massage the status or interupt cause accordingly: + * + * Return a cleared UART_LSR_RD status if there is no accompanying + * UART_IIR_RDI. Hopefully the new status is used by interrupt handler + * to skip reading receive FIFO. Otherwise some UART controller stops + * generating RDI interrupt after this unnotified FIFO read, until other + * interrupts maybe transmit interrupt reads UART_LSR again. + * + * Or clear interrupt cause UART_IIR_RDI without UART_LSR_RD. The UART sets + * UART_IIR_RDI *even* if the received data has been read out from the FIFO + * before the timeout occurs. To clear UART_IIR_RDI, read receive buffer + * register. Reading it also clears timeout interrupt for 16550+. Otherwise + * the uncleared UART_IIR_RDI will keep triggering IRQ but interrupt + * handler finds nothing to do. + * + * Skip this workaround if interrupt is not expected, such as backup timer, + * so that handler can still solely rely on original status register. + */ +static unsigned char serial8250_iir_rdi_check(struct uart_8250_port *up, +unsigned char status, +unsigned int iir) +{ + unsigned int ier, rdi_stat, rdi_intr; + + /* skip for handler without interrupt */ + if (!up-port.irq) + return status; + + /* skip for polling handler such as backup timer */ + ier = serial_in(up, UART_IER); + if (!(ier UART_IER_RDI)) + return status; + + rdi_stat = status UART_LSR_DR; + rdi_intr = iir UART_IIR_RDI; + + if (rdi_stat !rdi_intr) + status = ~UART_LSR_DR; + else if (!rdi_stat rdi_intr) + serial_in(up, UART_RX); + return status; +} + +/* * This handles the interrupt from one port. */ int serial8250_handle_irq(struct uart_port *port, unsigned int iir) @@ -1497,6 +1543,12 @@ int serial8250_handle_irq(struct uart_port *port, unsigned int iir) DEBUG_INTR(status = %x..., status); + /* Some UART controller has mismatched UART_IIR_RDI and UART_LSR_DR, + which causes either too many interrupts or interrupt freeze +*/ + if (!skip_rdi_check) + status = serial8250_iir_rdi_check(up, status, iir); + if (status (UART_LSR_DR | UART_LSR_BI)) status = serial8250_rx_chars(up, status); serial8250_modem_status(up); @@ -3338,6 +3390,9 @@ MODULE_PARM_DESC(nr_uarts, Maximum number of UARTs supported. (1- __MODULE_STR module_param(skip_txen_test, uint, 0644); MODULE_PARM_DESC(skip_txen_test, Skip checking for the TXEN bug at init time); +module_param(skip_rdi_check, uint, 0644); +MODULE_PARM_DESC(skip_rdi_check, Skip checking IIR RDI bug in interrupt); + #ifdef CONFIG_SERIAL_8250_RSA module_param_array(probe_rsa, ulong, probe_rsa_count, 0444); MODULE_PARM_DESC(probe_rsa, Probe I/O ports for RSA); -- 1.7.7.4 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] mlx4: set carrier off after register netdev
On Fri, 5 Oct 2012, Ben Hutchings wrote: > On Fri, 2012-10-05 at 11:28 -0700, Min Zhang wrote: > > ifconfig mlx4_en port reported RUNNING even though the link was down. > > > > mlx4_en_init_netdev didn't initialize the dev operstate properly so > > the operstate stayed as default IF_OPER_UNKNOWN, then ifconfig treated > > the UNKNOWN as RUNNING state for backward compatiblity per RFC2863. > > > > The fix calls netif_carrier_off which is supposed to set operstate > > after register_netdev. Calling it before register_netdev has no effect > > since the dev->state is still NETREG_UNINITIALIZED > > > > Tested by removing the physical link signal to the mellanox 10G port, > > modprobe mlx4_en, then ifconfig up. Verify there is no RUNNING status. > [...] > > This was supposed to be fixed by: > > commit 8f4cccbbd92f2ad0ddbbc498ef7cee2a1c3defe9 > Author: Ben Hutchings > Date: Mon Aug 20 22:16:51 2012 +0100 > > net: Set device operstate at registration time > > Does that not work for mlx4_en, for some reason? > > Ben. > > -- > Ben Hutchings, Staff Engineer, Solarflare > Not speaking for my employer; that's the marketing department's job. > They asked us to note that Solarflare product names are trademarked. > Indeed, commit 8f4cccbbd92f2ad0ddbbc498ef7cee2a1c3defe9 does fix the issue in mlx4. I didn't have the newest net/core. Therefore ignore my mlx4 fix. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] mlx4: set carrier off after register netdev
ifconfig mlx4_en port reported RUNNING even though the link was down. mlx4_en_init_netdev didn't initialize the dev operstate properly so the operstate stayed as default IF_OPER_UNKNOWN, then ifconfig treated the UNKNOWN as RUNNING state for backward compatiblity per RFC2863. The fix calls netif_carrier_off which is supposed to set operstate after register_netdev. Calling it before register_netdev has no effect since the dev->state is still NETREG_UNINITIALIZED Tested by removing the physical link signal to the mellanox 10G port, modprobe mlx4_en, then ifconfig up. Verify there is no RUNNING status. Signed-off-by: Min Zhang --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index c96113b..8c562f7a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1306,12 +1306,12 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, mdev->pndev[port] = dev; - netif_carrier_off(dev); err = register_netdev(dev); if (err) { mlx4_err(mdev, "Netdev registration failed for port %d\n", port); goto out; } + netif_carrier_off(dev); en_warn(priv, "Using %d TX rings\n", prof->tx_ring_num); en_warn(priv, "Using %d RX rings\n", prof->rx_ring_num); -- 1.7.7.4 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] mlx4: set carrier off after register netdev
ifconfig mlx4_en port reported RUNNING even though the link was down. mlx4_en_init_netdev didn't initialize the dev operstate properly so the operstate stayed as default IF_OPER_UNKNOWN, then ifconfig treated the UNKNOWN as RUNNING state for backward compatiblity per RFC2863. The fix calls netif_carrier_off which is supposed to set operstate after register_netdev. Calling it before register_netdev has no effect since the dev-state is still NETREG_UNINITIALIZED Tested by removing the physical link signal to the mellanox 10G port, modprobe mlx4_en, then ifconfig up. Verify there is no RUNNING status. Signed-off-by: Min Zhang mzh...@mvista.com --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index c96113b..8c562f7a 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1306,12 +1306,12 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, mdev-pndev[port] = dev; - netif_carrier_off(dev); err = register_netdev(dev); if (err) { mlx4_err(mdev, Netdev registration failed for port %d\n, port); goto out; } + netif_carrier_off(dev); en_warn(priv, Using %d TX rings\n, prof-tx_ring_num); en_warn(priv, Using %d RX rings\n, prof-rx_ring_num); -- 1.7.7.4 -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
Re: [PATCH] mlx4: set carrier off after register netdev
On Fri, 5 Oct 2012, Ben Hutchings wrote: On Fri, 2012-10-05 at 11:28 -0700, Min Zhang wrote: ifconfig mlx4_en port reported RUNNING even though the link was down. mlx4_en_init_netdev didn't initialize the dev operstate properly so the operstate stayed as default IF_OPER_UNKNOWN, then ifconfig treated the UNKNOWN as RUNNING state for backward compatiblity per RFC2863. The fix calls netif_carrier_off which is supposed to set operstate after register_netdev. Calling it before register_netdev has no effect since the dev-state is still NETREG_UNINITIALIZED Tested by removing the physical link signal to the mellanox 10G port, modprobe mlx4_en, then ifconfig up. Verify there is no RUNNING status. [...] This was supposed to be fixed by: commit 8f4cccbbd92f2ad0ddbbc498ef7cee2a1c3defe9 Author: Ben Hutchings bhutchi...@solarflare.com Date: Mon Aug 20 22:16:51 2012 +0100 net: Set device operstate at registration time Does that not work for mlx4_en, for some reason? Ben. -- Ben Hutchings, Staff Engineer, Solarflare Not speaking for my employer; that's the marketing department's job. They asked us to note that Solarflare product names are trademarked. Indeed, commit 8f4cccbbd92f2ad0ddbbc498ef7cee2a1c3defe9 does fix the issue in mlx4. I didn't have the newest net/core. Therefore ignore my mlx4 fix. -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/2] Preempt realtime Oops syslog
Add bust_spinlocks when kernel die from do_trap to flush Oops dump to syslog immediatly. Oops is missing from syslog if it is from non-preemptible section, because on PREEMPT_RT kernel, Oops printk doesn't wake up klogd from non-preemptible context; see release_console_sem in kernel/printk.c. Tested by Oops in module_init and insmod that kernel module. The Oops is in non-preemptive context because modules are loaded from inside a realtime priority thread in a preemptive realtime kernel. Only apply to PPC arch because bust_spinlocks has already been used by x86 and other architectures Signed-off-by: Min Zhang <[EMAIL PROTECTED]> Index: rt-2.6/arch/ppc/kernel/traps.c === --- rt-2.6.orig/arch/ppc/kernel/traps.c +++ rt-2.6/arch/ppc/kernel/traps.c @@ -92,6 +92,7 @@ int die(const char * str, struct pt_regs if (nl) printk("\n"); show_regs(fp); + bust_spinlocks(0); add_taint(TAINT_DIE); spin_unlock_irq(_lock); /* do_exit() should take care of panic'ing from an interrupt -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 2/2] Preempt realtime printk syslog
Add 50s timeout in do_syslog to poll printk from non-preemptible section. printk is missing from syslog if from non-preemptible section, because on PREEMPT_RT kernels, printk doesn't wake up syslogd from non-preemptible section; see release_console_sem in kernel/printk.c. Tested by printk in module_init and insmod that kernel module. The printk is in non-preemptive context because modules are loaded from inside a realtime priority thread in a preemptive realtime kernel. Signed-off-by: Min Zhang <[EMAIL PROTECTED]> Index: rt-2.6/kernel/printk.c === --- rt-2.6.orig/kernel/printk.c +++ rt-2.6/kernel/printk.c @@ -116,6 +116,12 @@ static int preferred_console = -1; /* Flag: console code may call schedule() */ static int console_may_schedule; +/* minimum time in jiffies between messages */ +int printk_ratelimit_jiffies = 5 * HZ; + +/* number of messages we send before ratelimiting */ +int printk_ratelimit_burst = 10; + #ifdef CONFIG_PRINTK static char __log_buf[__LOG_BUF_LEN]; @@ -313,10 +319,26 @@ int do_syslog(int type, char __user *buf error = -EFAULT; goto out; } +#ifdef CONFIG_PREEMPT_RT + /* +* On PREEMPT_RT kernels release_console_sem wakes us only if +* in preemptible section, so timeout to poll for printk from +* non-preemptible sections. +*/ + i = printk_ratelimit_burst * printk_ratelimit_jiffies; + error = wait_event_interruptible_timeout(log_wait, +(log_start - log_end), +i); + if (error < 0) + goto out; + else + error = 0; +#else error = wait_event_interruptible(log_wait, (log_start - log_end)); if (error) goto out; +#endif i = 0; spin_lock_irq(_lock); while (!error && (log_start != log_end) && i < len) { @@ -1272,12 +1294,6 @@ int __printk_ratelimit(int ratelimit_jif } EXPORT_SYMBOL(__printk_ratelimit); -/* minimum time in jiffies between messages */ -int printk_ratelimit_jiffies = 5 * HZ; - -/* number of messages we send before ratelimiting */ -int printk_ratelimit_burst = 10; - int printk_ratelimit(void) { return __printk_ratelimit(printk_ratelimit_jiffies, -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 1/2] Preempt realtime Oops syslog
Add bust_spinlocks when kernel die from do_trap to flush Oops dump to syslog immediatly. Oops is missing from syslog if it is from non-preemptible section, because on PREEMPT_RT kernel, Oops printk doesn't wake up klogd from non-preemptible context; see release_console_sem in kernel/printk.c. Tested by Oops in module_init and insmod that kernel module. The Oops is in non-preemptive context because modules are loaded from inside a realtime priority thread in a preemptive realtime kernel. Only apply to PPC arch because bust_spinlocks has already been used by x86 and other architectures Signed-off-by: Min Zhang [EMAIL PROTECTED] Index: rt-2.6/arch/ppc/kernel/traps.c === --- rt-2.6.orig/arch/ppc/kernel/traps.c +++ rt-2.6/arch/ppc/kernel/traps.c @@ -92,6 +92,7 @@ int die(const char * str, struct pt_regs if (nl) printk(\n); show_regs(fp); + bust_spinlocks(0); add_taint(TAINT_DIE); spin_unlock_irq(die_lock); /* do_exit() should take care of panic'ing from an interrupt -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH 2/2] Preempt realtime printk syslog
Add 50s timeout in do_syslog to poll printk from non-preemptible section. printk is missing from syslog if from non-preemptible section, because on PREEMPT_RT kernels, printk doesn't wake up syslogd from non-preemptible section; see release_console_sem in kernel/printk.c. Tested by printk in module_init and insmod that kernel module. The printk is in non-preemptive context because modules are loaded from inside a realtime priority thread in a preemptive realtime kernel. Signed-off-by: Min Zhang [EMAIL PROTECTED] Index: rt-2.6/kernel/printk.c === --- rt-2.6.orig/kernel/printk.c +++ rt-2.6/kernel/printk.c @@ -116,6 +116,12 @@ static int preferred_console = -1; /* Flag: console code may call schedule() */ static int console_may_schedule; +/* minimum time in jiffies between messages */ +int printk_ratelimit_jiffies = 5 * HZ; + +/* number of messages we send before ratelimiting */ +int printk_ratelimit_burst = 10; + #ifdef CONFIG_PRINTK static char __log_buf[__LOG_BUF_LEN]; @@ -313,10 +319,26 @@ int do_syslog(int type, char __user *buf error = -EFAULT; goto out; } +#ifdef CONFIG_PREEMPT_RT + /* +* On PREEMPT_RT kernels release_console_sem wakes us only if +* in preemptible section, so timeout to poll for printk from +* non-preemptible sections. +*/ + i = printk_ratelimit_burst * printk_ratelimit_jiffies; + error = wait_event_interruptible_timeout(log_wait, +(log_start - log_end), +i); + if (error 0) + goto out; + else + error = 0; +#else error = wait_event_interruptible(log_wait, (log_start - log_end)); if (error) goto out; +#endif i = 0; spin_lock_irq(logbuf_lock); while (!error (log_start != log_end) i len) { @@ -1272,12 +1294,6 @@ int __printk_ratelimit(int ratelimit_jif } EXPORT_SYMBOL(__printk_ratelimit); -/* minimum time in jiffies between messages */ -int printk_ratelimit_jiffies = 5 * HZ; - -/* number of messages we send before ratelimiting */ -int printk_ratelimit_burst = 10; - int printk_ratelimit(void) { return __printk_ratelimit(printk_ratelimit_jiffies, -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] arch/x86/kernel/cpu/mcheck/p4.c, kernel 2.6.24-rc5
Consolidate printk and insert CPU id to cleanup SMP interleaved output. In SMP, the machine check exception dispatches all logical processors within a physical package to the machine-check exception handler, so the printk within each handler outputs concurrently and makes the output unreadable. Refer to Intel system programming guide Part 1 Section 7.8.5 http://developer.intel.com/design/processor/manuals/253668.pdf Signed-off-by: Min Zhang <[EMAIL PROTECTED]> Index: linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/p4.c === --- linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/p4.c +++ linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/p4.c @@ -158,32 +158,35 @@ static fastcall void intel_machine_check if (mce_num_extended_msrs > 0) { struct intel_mce_extended_msrs dbg; intel_get_extended_msrs(); - printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n", - smp_processor_id(), dbg.eip, dbg.eflags); - printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n", - dbg.eax, dbg.ebx, dbg.ecx, dbg.edx); - printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", + printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n" + "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n" + "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", + smp_processor_id(), dbg.eip, dbg.eflags, + dbg.eax, dbg.ebx, dbg.ecx, dbg.edx, dbg.esi, dbg.edi, dbg.ebp, dbg.esp); } for (i=0; ihttp://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
[PATCH] arch/x86/kernel/cpu/mcheck/p4.c, kernel 2.6.24-rc5
Consolidate printk and insert CPU id to cleanup SMP interleaved output. In SMP, the machine check exception dispatches all logical processors within a physical package to the machine-check exception handler, so the printk within each handler outputs concurrently and makes the output unreadable. Refer to Intel system programming guide Part 1 Section 7.8.5 http://developer.intel.com/design/processor/manuals/253668.pdf Signed-off-by: Min Zhang [EMAIL PROTECTED] Index: linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/p4.c === --- linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/p4.c +++ linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/p4.c @@ -158,32 +158,35 @@ static fastcall void intel_machine_check if (mce_num_extended_msrs 0) { struct intel_mce_extended_msrs dbg; intel_get_extended_msrs(dbg); - printk (KERN_DEBUG CPU %d: EIP: %08x EFLAGS: %08x\n, - smp_processor_id(), dbg.eip, dbg.eflags); - printk (KERN_DEBUG \teax: %08x ebx: %08x ecx: %08x edx: %08x\n, - dbg.eax, dbg.ebx, dbg.ecx, dbg.edx); - printk (KERN_DEBUG \tesi: %08x edi: %08x ebp: %08x esp: %08x\n, + printk (KERN_DEBUG CPU %d: EIP: %08x EFLAGS: %08x\n + \teax: %08x ebx: %08x ecx: %08x edx: %08x\n + \tesi: %08x edi: %08x ebp: %08x esp: %08x\n, + smp_processor_id(), dbg.eip, dbg.eflags, + dbg.eax, dbg.ebx, dbg.ecx, dbg.edx, dbg.esi, dbg.edi, dbg.ebp, dbg.esp); } for (i=0; inr_mce_banks; i++) { rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high); if (high (131)) { + char misc[20]; + char addr[24]; + misc[0] = addr[0] = '\0'; if (high (129)) recover |= 1; if (high (125)) recover |= 2; - printk (KERN_EMERG Bank %d: %08x%08x, i, high, low); high = ~(131); if (high (127)) { rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh); - printk ([%08x%08x], ahigh, alow); + snprintf (misc, 20, [%08x%08x], ahigh, alow); } if (high (126)) { rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh); - printk ( at %08x%08x, ahigh, alow); + snprintf (addr, 24, at %08x%08x, ahigh, alow); } - printk (\n); + printk (KERN_EMERG CPU %d: Bank %d: %08x%08x%s%s\n, + smp_processor_id(), i, high, low, misc, addr); } } Index: linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/p6.c === --- linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/p6.c +++ linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/p6.c @@ -33,21 +33,24 @@ static fastcall void intel_machine_check for (i=0; inr_mce_banks; i++) { rdmsr (MSR_IA32_MC0_STATUS+i*4,low, high); if (high (131)) { + char misc[20]; + char addr[24]; + misc[0] = addr[0] = '\0'; if (high (129)) recover |= 1; if (high (125)) recover |= 2; - printk (KERN_EMERG Bank %d: %08x%08x, i, high, low); high = ~(131); if (high (127)) { rdmsr (MSR_IA32_MC0_MISC+i*4, alow, ahigh); - printk ([%08x%08x], ahigh, alow); + snprintf (misc, 20, [%08x%08x], ahigh, alow); } if (high (126)) { rdmsr (MSR_IA32_MC0_ADDR+i*4, alow, ahigh); - printk ( at %08x%08x, ahigh, alow); + snprintf (addr, 24, at %08x%08x, ahigh, alow); } - printk (\n); + printk (KERN_EMERG CPU %d: Bank %d: %08x%08x%s%s\n, + smp_processor_id(), i, high, low, misc, addr); } } Index: linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/k7.c === --- linux-2.6.24-rc5.orig/arch/x86/kernel/cpu/mcheck/k7.c +++ linux-2.6.24-rc5/arch/x86/kernel/cpu/mcheck/k7.c @@ -33,21 +33,24 @@ static fastcall void k7_machine_check(st for (i=1