Index: delay.h.in
===================================================================
--- delay.h.in	(revision 2188)
+++ delay.h.in	(working copy)
@@ -109,16 +109,29 @@
    mode _delay_ms() will work with a resolution of 1/10 ms, providing
    delays up to 6.5535 seconds (independent from CPU frequency).  The
    user will not be informed about decreased resolution.
+
+   If the avr-gcc toolchain has __builtin_avr_delay_cycles(unsigned long)
+   support, maximal possible delay is 4294967.295 s/ F_CPU in MHz. For
+   values greater than the maximal possible delay, overflows results in
+   no delay i.e., 0ms.
  */
 void
 _delay_ms(double __ms)
 {
 	uint16_t __ticks;
-	double __tmp = ((F_CPU) / 4e3) * __ms;
+	double __tmp ; 
 #if __HAS_DELAY_CYCLES && defined(__OPTIMIZE__)
+	uint32_t __ticks_dc;
 	extern void __builtin_avr_delay_cycles(unsigned long);
-	__builtin_avr_delay_cycles(__tmp);
+	__tmp = ((F_CPU) / e3) * __ms;
+	if (__tmp < 1.0)
+		__ticks_dc = 1;
+	else
+		__ticks_dc = (uint32_t)__tmp;
+
+	__builtin_avr_delay_cycles(__ticks_dc);
 #else
+	__tmp = ((F_CPU) / 4e3) * __ms;
 	if (__tmp < 1.0)
 		__ticks = 1;
 	else if (__tmp > 65535)
@@ -152,16 +165,29 @@
    If the user requests a delay greater than the maximal possible one,
    _delay_us() will automatically call _delay_ms() instead.  The user
    will not be informed about this case.
+
+   If the avr-gcc toolchain has __builtin_avr_delay_cycles(unsigned long)
+   support, maximal possible delay is 4294967.295 us/ F_CPU in MHz. For
+   values greater than the maximal possible delay, overflow results in
+   no delay i.e., 0us.
  */
 void
 _delay_us(double __us)
 {
 	uint8_t __ticks;
-	double __tmp = ((F_CPU) / 3e6) * __us;
+	double __tmp ; 
 #if __HAS_DELAY_CYCLES && defined(__OPTIMIZE__)
+	uint32_t __ticks_dc;
 	extern void __builtin_avr_delay_cycles(unsigned long);
-	__builtin_avr_delay_cycles(__tmp);
+	__tmp = ((F_CPU) / e6) * __us;
+	if (__tmp < 1.0)
+		__ticks_dc = 1;
+	else
+		__ticks_dc = (uint32_t)__tmp;
+
+	__builtin_avr_delay_cycles(__ticks_dc);
 #else
+	__tmp = ((F_CPU) / 3e6) * __us;
 	if (__tmp < 1.0)
 		__ticks = 1;
 	else if (__tmp > 255)
