Regression in panic

2011-06-21 Thread Mandeep Singh Baines
On Tue, Jun 22, 2010 at 8:12 PM, Dave Airlie  wrote:
> From: Jesse Barnes 
>
> Jesse's initial patch commit said:
>
> "At panic time (i.e. when oops_in_progress is set) we should try a bit
> harder to update the screen and make sure output gets to the VT, since
> some drivers are capable of flipping back to it.
>
> So make sure we try to unblank and update the display if called from a
> panic context."
>
> I've enhanced this to add a flag to the vc that console layer can set
> to indicate they want this behaviour to occur. This also adds support
> to fbcon for that flag and adds an fb flag for drivers to indicate
> they want to use the support. It enables this for KMS drivers.
>
> Signed-off-by: Dave Airlie 

Hi Dave,

I think this change is causing a regression I'm seeing in panic.
Before this change, I'd get a
reboot on panic (we've configured as such).

With this change, my machine gets wedged if the machine is running in
X when the panic occurs.

I traced the code flow to this:

bust_spinlocks(0);
 ->unblank_screen();
   ->do_unblank_screen(0);
 ->vc->vc_sw->con_blank(vc, 0, 0);
   ->fbcon_blank(vc, 0, 0);
 ->update_screen(vc);
   ->redraw_screen(vc, 0);
 ->vc->vc_sw->con_switch(vc);
   ->fbcon_switch(vc);
 ->ops->update_start(info);
   ->bit_update_start(info);
->fb_pan_display(info, &ops->var);
  ->info->fbops->fb_pan_display(var, info);
->drm_fb_helper_pan_display(var, info);
  ->mutex_lock(&dev->mode_config.mutex); *this blocks*

With this change, there is now a lot going on in the panic path. Stuff
that I'm not sure is safe when panicking. In addition to the
mutex_lock, there is also a del_timer_sync()
now happening in the context of panic().

I see this bug with a 2.6.38 kernel but did a quick scan of a newer
kernels and did not see anything that changed in this path so I
suspect its still there.

Reverting this change fixes the regression.

Regards,
Mandeep

> ---
>  drivers/char/vt.c                       |   13 +
>  drivers/gpu/drm/i915/intel_fb.c         |    4 +---
>  drivers/gpu/drm/nouveau/nouveau_fbcon.c |    1 +
>  drivers/gpu/drm/radeon/radeon_fb.c      |    2 +-
>  drivers/video/console/fbcon.c           |    4 +++-
>  include/linux/console_struct.h          |    1 +
>  include/linux/fb.h                      |    4 
>  include/linux/vt_kern.h                 |    7 +++
>  8 files changed, 27 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/char/vt.c b/drivers/char/vt.c
> index 7cdb6ee..6e04c9e 100644
> --- a/drivers/char/vt.c
> +++ b/drivers/char/vt.c
> @@ -698,7 +698,10 @@ void redraw_screen(struct vc_data *vc, int is_switch)
>                        update_attr(vc);
>                        clear_buffer_attributes(vc);
>                }
> -               if (update && vc->vc_mode != KD_GRAPHICS)
> +
> +               /* Forcibly update if we're panicing */
> +               if ((update && vc->vc_mode != KD_GRAPHICS) ||
> +                   vt_force_oops_output(vc))
>                        do_update_region(vc, vc->vc_origin, 
> vc->vc_screenbuf_size / 2);
>        }
>        set_cursor(vc);
> @@ -736,6 +739,7 @@ static void visual_init(struct vc_data *vc, int num, int 
> init)
>        vc->vc_hi_font_mask = 0;
>        vc->vc_complement_mask = 0;
>        vc->vc_can_do_color = 0;
> +       vc->vc_panic_force_write = false;
>        vc->vc_sw->con_init(vc, init);
>        if (!vc->vc_complement_mask)
>                vc->vc_complement_mask = vc->vc_can_do_color ? 0x7700 : 0x0800;
> @@ -2498,7 +2502,7 @@ static void vt_console_print(struct console *co, const 
> char *b, unsigned count)
>                goto quit;
>        }
>
> -       if (vc->vc_mode != KD_TEXT)
> +       if (vc->vc_mode != KD_TEXT && !vt_force_oops_output(vc))
>                goto quit;
>
>        /* undraw cursor first */
> @@ -3703,7 +3707,8 @@ void do_unblank_screen(int leaving_gfx)
>                return;
>        }
>        vc = vc_cons[fg_console].d;
> -       if (vc->vc_mode != KD_TEXT)
> +       /* Try to unblank in oops case too */
> +       if (vc->vc_mode != KD_TEXT && !vt_force_oops_output(vc))
>                return; /* but leave console_blanked != 0 */
>
>        if (blankinterval) {
> @@ -3712,7 +3717,7 @@ void do_unblank_screen(int leaving_gfx)
>        }
>
>        console_blanked = 0;
> -       if (vc->vc_sw->con_blank(vc, 0, leaving_gfx))
> +       if (vc->vc_sw->con_blank(vc, 0, leaving_gfx) || 
> vt_force_oops_output(vc))
>                /* Low-level driver cannot restore -> do it ourselves */
>                update_screen(vc);
>        if (console_blank_hook)
> diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
> index c3c5052..bd5d87a 100644
> --- a/drivers/gpu/drm/i915/intel_fb.c
> +++ b/drivers/gpu/drm/i915/intel_fb.c
> @@ -128,7 +128,7 @@ static int intelfb_create(s

Re: Regression in panic

2011-06-21 Thread Mandeep Singh Baines
On Mon, Jun 20, 2011 at 4:03 PM, David Rientjes  wrote:
> On Mon, 20 Jun 2011, Mandeep Singh Baines wrote:
>
>> Hi Dave,
>>
>> I think this change is causing a regression I'm seeing in panic.
>> Before this change, I'd get a
>> reboot on panic (we've configured as such).
>>
>> With this change, my machine gets wedged if the machine is running in
>> X when the panic occurs.
>>
>> I traced the code flow to this:
>>
>> bust_spinlocks(0);
>>  ->unblank_screen();
>>    ->do_unblank_screen(0);
>>      ->vc->vc_sw->con_blank(vc, 0, 0);
>>        ->fbcon_blank(vc, 0, 0);
>>          ->update_screen(vc);
>>            ->redraw_screen(vc, 0);
>>              ->vc->vc_sw->con_switch(vc);
>>                ->fbcon_switch(vc);
>>                  ->ops->update_start(info);
>>                    ->bit_update_start(info);
>>                     ->fb_pan_display(info, &ops->var);
>>                       ->info->fbops->fb_pan_display(var, info);
>>                         ->drm_fb_helper_pan_display(var, info);
>>                           ->mutex_lock(&dev->mode_config.mutex); *this 
>> blocks*
>>
>> With this change, there is now a lot going on in the panic path. Stuff
>> that I'm not sure is safe when panicking. In addition to the
>> mutex_lock, there is also a del_timer_sync()
>> now happening in the context of panic().
>>
>> I see this bug with a 2.6.38 kernel but did a quick scan of a newer
>> kernels and did not see anything that changed in this path so I
>> suspect its still there.
>>
>> Reverting this change fixes the regression.
>>
>
> Chris Fowler reports something similar when running 2.6.38 by inducing a
> kernel panic via the oom killer -- see
> http://marc.info/?l=linux-kernel&m=130805985022791.  I've added him to the
> cc so he can participate in the thread and cherry-pick any fixes (last
> status update was that he was going to be trying 2.6.38.8).
>

One potential fix might be to convert the mutex_lock to a try if
oops_in_progress but
I suspect oops_in_progress checks may be needed in a bunch of other places in
the screen_unblank code path.

--
EditLive Enterprise is the world's most technically advanced content
authoring tool. Experience the power of Track Changes, Inline Image
Editing and ensure content is compliant with Accessibility Checking.
http://p.sf.net/sfu/ephox-dev2dev
--
___
Dri-devel mailing list
dri-de...@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel


Re: Regression in panic

2011-06-20 Thread David Rientjes
On Mon, 20 Jun 2011, Mandeep Singh Baines wrote:

> Hi Dave,
> 
> I think this change is causing a regression I'm seeing in panic.
> Before this change, I'd get a
> reboot on panic (we've configured as such).
> 
> With this change, my machine gets wedged if the machine is running in
> X when the panic occurs.
> 
> I traced the code flow to this:
> 
> bust_spinlocks(0);
>  ->unblank_screen();
>->do_unblank_screen(0);
>  ->vc->vc_sw->con_blank(vc, 0, 0);
>->fbcon_blank(vc, 0, 0);
>  ->update_screen(vc);
>->redraw_screen(vc, 0);
>  ->vc->vc_sw->con_switch(vc);
>->fbcon_switch(vc);
>  ->ops->update_start(info);
>->bit_update_start(info);
> ->fb_pan_display(info, &ops->var);
>   ->info->fbops->fb_pan_display(var, info);
> ->drm_fb_helper_pan_display(var, info);
>   ->mutex_lock(&dev->mode_config.mutex); *this blocks*
> 
> With this change, there is now a lot going on in the panic path. Stuff
> that I'm not sure is safe when panicking. In addition to the
> mutex_lock, there is also a del_timer_sync()
> now happening in the context of panic().
> 
> I see this bug with a 2.6.38 kernel but did a quick scan of a newer
> kernels and did not see anything that changed in this path so I
> suspect its still there.
> 
> Reverting this change fixes the regression.
> 

Chris Fowler reports something similar when running 2.6.38 by inducing a 
kernel panic via the oom killer -- see 
http://marc.info/?l=linux-kernel&m=130805985022791.  I've added him to the 
cc so he can participate in the thread and cherry-pick any fixes (last 
status update was that he was going to be trying 2.6.38.8).

--
EditLive Enterprise is the world's most technically advanced content
authoring tool. Experience the power of Track Changes, Inline Image
Editing and ensure content is compliant with Accessibility Checking.
http://p.sf.net/sfu/ephox-dev2dev
--
___
Dri-devel mailing list
dri-de...@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel
___
dri-devel mailing list
dri-devel@lists.freedesktop.org
http://lists.freedesktop.org/mailman/listinfo/dri-devel


Regression in panic

2011-06-20 Thread Mandeep Singh Baines
On Mon, Jun 20, 2011 at 4:03 PM, David Rientjes  wrote:
> On Mon, 20 Jun 2011, Mandeep Singh Baines wrote:
>
>> Hi Dave,
>>
>> I think this change is causing a regression I'm seeing in panic.
>> Before this change, I'd get a
>> reboot on panic (we've configured as such).
>>
>> With this change, my machine gets wedged if the machine is running in
>> X when the panic occurs.
>>
>> I traced the code flow to this:
>>
>> bust_spinlocks(0);
>> ?->unblank_screen();
>> ? ?->do_unblank_screen(0);
>> ? ? ?->vc->vc_sw->con_blank(vc, 0, 0);
>> ? ? ? ?->fbcon_blank(vc, 0, 0);
>> ? ? ? ? ?->update_screen(vc);
>> ? ? ? ? ? ?->redraw_screen(vc, 0);
>> ? ? ? ? ? ? ?->vc->vc_sw->con_switch(vc);
>> ? ? ? ? ? ? ? ?->fbcon_switch(vc);
>> ? ? ? ? ? ? ? ? ?->ops->update_start(info);
>> ? ? ? ? ? ? ? ? ? ?->bit_update_start(info);
>> ? ? ? ? ? ? ? ? ? ? ->fb_pan_display(info, &ops->var);
>> ? ? ? ? ? ? ? ? ? ? ? ->info->fbops->fb_pan_display(var, info);
>> ? ? ? ? ? ? ? ? ? ? ? ? ->drm_fb_helper_pan_display(var, info);
>> ? ? ? ? ? ? ? ? ? ? ? ? ? ->mutex_lock(&dev->mode_config.mutex); *this 
>> blocks*
>>
>> With this change, there is now a lot going on in the panic path. Stuff
>> that I'm not sure is safe when panicking. In addition to the
>> mutex_lock, there is also a del_timer_sync()
>> now happening in the context of panic().
>>
>> I see this bug with a 2.6.38 kernel but did a quick scan of a newer
>> kernels and did not see anything that changed in this path so I
>> suspect its still there.
>>
>> Reverting this change fixes the regression.
>>
>
> Chris Fowler reports something similar when running 2.6.38 by inducing a
> kernel panic via the oom killer -- see
> http://marc.info/?l=linux-kernel&m=130805985022791. ?I've added him to the
> cc so he can participate in the thread and cherry-pick any fixes (last
> status update was that he was going to be trying 2.6.38.8).
>

One potential fix might be to convert the mutex_lock to a try if
oops_in_progress but
I suspect oops_in_progress checks may be needed in a bunch of other places in
the screen_unblank code path.

--
EditLive Enterprise is the world's most technically advanced content
authoring tool. Experience the power of Track Changes, Inline Image
Editing and ensure content is compliant with Accessibility Checking.
http://p.sf.net/sfu/ephox-dev2dev
--
___
Dri-devel mailing list
Dri-devel at lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


Regression in panic

2011-06-20 Thread David Rientjes
On Mon, 20 Jun 2011, Mandeep Singh Baines wrote:

> Hi Dave,
> 
> I think this change is causing a regression I'm seeing in panic.
> Before this change, I'd get a
> reboot on panic (we've configured as such).
> 
> With this change, my machine gets wedged if the machine is running in
> X when the panic occurs.
> 
> I traced the code flow to this:
> 
> bust_spinlocks(0);
>  ->unblank_screen();
>->do_unblank_screen(0);
>  ->vc->vc_sw->con_blank(vc, 0, 0);
>->fbcon_blank(vc, 0, 0);
>  ->update_screen(vc);
>->redraw_screen(vc, 0);
>  ->vc->vc_sw->con_switch(vc);
>->fbcon_switch(vc);
>  ->ops->update_start(info);
>->bit_update_start(info);
> ->fb_pan_display(info, &ops->var);
>   ->info->fbops->fb_pan_display(var, info);
> ->drm_fb_helper_pan_display(var, info);
>   ->mutex_lock(&dev->mode_config.mutex); *this blocks*
> 
> With this change, there is now a lot going on in the panic path. Stuff
> that I'm not sure is safe when panicking. In addition to the
> mutex_lock, there is also a del_timer_sync()
> now happening in the context of panic().
> 
> I see this bug with a 2.6.38 kernel but did a quick scan of a newer
> kernels and did not see anything that changed in this path so I
> suspect its still there.
> 
> Reverting this change fixes the regression.
> 

Chris Fowler reports something similar when running 2.6.38 by inducing a 
kernel panic via the oom killer -- see 
http://marc.info/?l=linux-kernel&m=130805985022791.  I've added him to the 
cc so he can participate in the thread and cherry-pick any fixes (last 
status update was that he was going to be trying 2.6.38.8).

--
EditLive Enterprise is the world's most technically advanced content
authoring tool. Experience the power of Track Changes, Inline Image
Editing and ensure content is compliant with Accessibility Checking.
http://p.sf.net/sfu/ephox-dev2dev
--
___
Dri-devel mailing list
Dri-devel at lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/dri-devel


Regression in panic

2011-06-20 Thread Mandeep Singh Baines
On Tue, Jun 22, 2010 at 8:12 PM, Dave Airlie  wrote:
> From: Jesse Barnes 
>
> Jesse's initial patch commit said:
>
> "At panic time (i.e. when oops_in_progress is set) we should try a bit
> harder to update the screen and make sure output gets to the VT, since
> some drivers are capable of flipping back to it.
>
> So make sure we try to unblank and update the display if called from a
> panic context."
>
> I've enhanced this to add a flag to the vc that console layer can set
> to indicate they want this behaviour to occur. This also adds support
> to fbcon for that flag and adds an fb flag for drivers to indicate
> they want to use the support. It enables this for KMS drivers.
>
> Signed-off-by: Dave Airlie 

Hi Dave,

I think this change is causing a regression I'm seeing in panic.
Before this change, I'd get a
reboot on panic (we've configured as such).

With this change, my machine gets wedged if the machine is running in
X when the panic occurs.

I traced the code flow to this:

bust_spinlocks(0);
 ->unblank_screen();
   ->do_unblank_screen(0);
 ->vc->vc_sw->con_blank(vc, 0, 0);
   ->fbcon_blank(vc, 0, 0);
 ->update_screen(vc);
   ->redraw_screen(vc, 0);
 ->vc->vc_sw->con_switch(vc);
   ->fbcon_switch(vc);
 ->ops->update_start(info);
   ->bit_update_start(info);
->fb_pan_display(info, &ops->var);
  ->info->fbops->fb_pan_display(var, info);
->drm_fb_helper_pan_display(var, info);
  ->mutex_lock(&dev->mode_config.mutex); *this blocks*

With this change, there is now a lot going on in the panic path. Stuff
that I'm not sure is safe when panicking. In addition to the
mutex_lock, there is also a del_timer_sync()
now happening in the context of panic().

I see this bug with a 2.6.38 kernel but did a quick scan of a newer
kernels and did not see anything that changed in this path so I
suspect its still there.

Reverting this change fixes the regression.

Regards,
Mandeep

> ---
> ?drivers/char/vt.c ? ? ? ? ? ? ? ? ? ? ? | ? 13 +
> ?drivers/gpu/drm/i915/intel_fb.c ? ? ? ? | ? ?4 +---
> ?drivers/gpu/drm/nouveau/nouveau_fbcon.c | ? ?1 +
> ?drivers/gpu/drm/radeon/radeon_fb.c ? ? ?| ? ?2 +-
> ?drivers/video/console/fbcon.c ? ? ? ? ? | ? ?4 +++-
> ?include/linux/console_struct.h ? ? ? ? ?| ? ?1 +
> ?include/linux/fb.h ? ? ? ? ? ? ? ? ? ? ?| ? ?4 
> ?include/linux/vt_kern.h ? ? ? ? ? ? ? ? | ? ?7 +++
> ?8 files changed, 27 insertions(+), 9 deletions(-)
>
> diff --git a/drivers/char/vt.c b/drivers/char/vt.c
> index 7cdb6ee..6e04c9e 100644
> --- a/drivers/char/vt.c
> +++ b/drivers/char/vt.c
> @@ -698,7 +698,10 @@ void redraw_screen(struct vc_data *vc, int is_switch)
> ? ? ? ? ? ? ? ? ? ? ? ?update_attr(vc);
> ? ? ? ? ? ? ? ? ? ? ? ?clear_buffer_attributes(vc);
> ? ? ? ? ? ? ? ?}
> - ? ? ? ? ? ? ? if (update && vc->vc_mode != KD_GRAPHICS)
> +
> + ? ? ? ? ? ? ? /* Forcibly update if we're panicing */
> + ? ? ? ? ? ? ? if ((update && vc->vc_mode != KD_GRAPHICS) ||
> + ? ? ? ? ? ? ? ? ? vt_force_oops_output(vc))
> ? ? ? ? ? ? ? ? ? ? ? ?do_update_region(vc, vc->vc_origin, 
> vc->vc_screenbuf_size / 2);
> ? ? ? ?}
> ? ? ? ?set_cursor(vc);
> @@ -736,6 +739,7 @@ static void visual_init(struct vc_data *vc, int num, int 
> init)
> ? ? ? ?vc->vc_hi_font_mask = 0;
> ? ? ? ?vc->vc_complement_mask = 0;
> ? ? ? ?vc->vc_can_do_color = 0;
> + ? ? ? vc->vc_panic_force_write = false;
> ? ? ? ?vc->vc_sw->con_init(vc, init);
> ? ? ? ?if (!vc->vc_complement_mask)
> ? ? ? ? ? ? ? ?vc->vc_complement_mask = vc->vc_can_do_color ? 0x7700 : 0x0800;
> @@ -2498,7 +2502,7 @@ static void vt_console_print(struct console *co, const 
> char *b, unsigned count)
> ? ? ? ? ? ? ? ?goto quit;
> ? ? ? ?}
>
> - ? ? ? if (vc->vc_mode != KD_TEXT)
> + ? ? ? if (vc->vc_mode != KD_TEXT && !vt_force_oops_output(vc))
> ? ? ? ? ? ? ? ?goto quit;
>
> ? ? ? ?/* undraw cursor first */
> @@ -3703,7 +3707,8 @@ void do_unblank_screen(int leaving_gfx)
> ? ? ? ? ? ? ? ?return;
> ? ? ? ?}
> ? ? ? ?vc = vc_cons[fg_console].d;
> - ? ? ? if (vc->vc_mode != KD_TEXT)
> + ? ? ? /* Try to unblank in oops case too */
> + ? ? ? if (vc->vc_mode != KD_TEXT && !vt_force_oops_output(vc))
> ? ? ? ? ? ? ? ?return; /* but leave console_blanked != 0 */
>
> ? ? ? ?if (blankinterval) {
> @@ -3712,7 +3717,7 @@ void do_unblank_screen(int leaving_gfx)
> ? ? ? ?}
>
> ? ? ? ?console_blanked = 0;
> - ? ? ? if (vc->vc_sw->con_blank(vc, 0, leaving_gfx))
> + ? ? ? if (vc->vc_sw->con_blank(vc, 0, leaving_gfx) || 
> vt_force_oops_output(vc))
> ? ? ? ? ? ? ? ?/* Low-level driver cannot restore -> do it ourselves */
> ? ? ? ? ? ? ? ?update_screen(vc);
> ? ? ? ?if (console_blank_hook)
> diff --git a/drivers/gpu/drm/i915/intel_fb.c b/drivers/gpu/drm/i915/intel_fb.c
> index c3c5052..bd5d87a 100644
> --- a/drivers/gpu/drm/i915/intel_fb.c
> +++ b/drivers/gpu/drm/i915/intel_fb.c
> @@ -128,7 +128,7 @@ static int intelfb_create(s