[v8-dev] Re: r10210 committed - Port Math.pow inlining to ARM....

Yang Guo Fri, 09 Dec 2011 03:07:22 -0800

Hi Alexandre,

thanks for notifying. I just noticed it this morning as well and already
found one of at least two bugs. The second one occurs only in crankshaft. I
think I will be able to find and fix this one pretty soon as well.


Cheers,

Yang

On Fri, Dec 9, 2011 at 11:20 AM, Alexandre Rames
<[email protected]>wrote:

> Hello,
>
> This commit breaks the v8 raytrace benchmark on ARM.
> Could you have a look? I'll have a look as well.
>
> Thanks,
>
> Alexandre
>
> On Wed, Dec 7, 2011 at 5:09 PM,  <[email protected]> wrote:
> > Revision: 10210
> > Author:   [email protected]
> > Date:     Wed Dec  7 08:55:00 2011
> > Log:      Port Math.pow inlining to ARM.
> >
> > TEST=math-pow.js
> >
> > Review URL: http://codereview.chromium.org/8840008
> > http://code.google.com/p/v8/source/detail?r=10210
> >
> > Modified:
> >  /branches/bleeding_edge/src/arm/code-stubs-arm.cc
> >  /branches/bleeding_edge/src/arm/full-codegen-arm.cc
> >  /branches/bleeding_edge/src/arm/lithium-arm.cc
> >  /branches/bleeding_edge/src/arm/lithium-codegen-arm.cc
> >
> > =======================================
> > --- /branches/bleeding_edge/src/arm/code-stubs-arm.cc   Tue Nov 29
> 04:09:06
> > 2011
> > +++ /branches/bleeding_edge/src/arm/code-stubs-arm.cc   Wed Dec  7
> 08:55:00
> > 2011
> > @@ -3455,110 +3455,198 @@
> >
> >
> >  void MathPowStub::Generate(MacroAssembler* masm) {
> > -  Label call_runtime;
> > -
> > -  if (CpuFeatures::IsSupported(VFP3)) {
> > -    CpuFeatures::Scope scope(VFP3);
> > -
> > -    Label base_not_smi;
> > -    Label exponent_not_smi;
> > -    Label convert_exponent;
> > -
> > -    const Register base = r0;
> > -    const Register exponent = r1;
> > -    const Register heapnumbermap = r5;
> > -    const Register heapnumber = r6;
> > -    const DoubleRegister double_base = d0;
> > -    const DoubleRegister double_exponent = d1;
> > -    const DoubleRegister double_result = d2;
> > -    const SwVfpRegister single_scratch = s0;
> > -    const Register scratch = r9;
> > -    const Register scratch2 = r7;
> > -
> > -    __ LoadRoot(heapnumbermap, Heap::kHeapNumberMapRootIndex);
> > +  CpuFeatures::Scope vfp3_scope(VFP3);
> > +  const Register base = r1;
> > +  const Register exponent = r2;
> > +  const Register heapnumbermap = r5;
> > +  const Register heapnumber = r0;
> > +  const DoubleRegister double_base = d1;
> > +  const DoubleRegister double_exponent = d2;
> > +  const DoubleRegister double_result = d3;
> > +  const DoubleRegister double_scratch = d0;
> > +  const SwVfpRegister single_scratch = s0;
> > +  const Register scratch = r9;
> > +  const Register scratch2 = r7;
> > +
> > +  Label call_runtime, done, exponent_not_smi, int_exponent;
> > +  if (exponent_type_ == ON_STACK) {
> > +    Label base_is_smi, unpack_exponent;
> > +    // The exponent and base are supplied as arguments on the stack.
> > +    // This can only happen if the stub is called from non-optimized
> code.
> > +    // Load input parameters from stack to double registers.
> >     __ ldr(base, MemOperand(sp, 1 * kPointerSize));
> >     __ ldr(exponent, MemOperand(sp, 0 * kPointerSize));
> >
> > -    // Convert base to double value and store it in d0.
> > -    __ JumpIfNotSmi(base, &base_not_smi);
> > -    // Base is a Smi. Untag and convert it.
> > -    __ SmiUntag(base);
> > -    __ vmov(single_scratch, base);
> > -    __ vcvt_f64_s32(double_base, single_scratch);
> > -    __ b(&convert_exponent);
> > -
> > -    __ bind(&base_not_smi);
> > +    __ LoadRoot(heapnumbermap, Heap::kHeapNumberMapRootIndex);
> > +
> > +    __ JumpIfSmi(base, &base_is_smi);
> >     __ ldr(scratch, FieldMemOperand(base, JSObject::kMapOffset));
> >     __ cmp(scratch, heapnumbermap);
> >     __ b(ne, &call_runtime);
> > -    // Base is a heapnumber. Load it into double register.
> > +
> >     __ vldr(double_base, FieldMemOperand(base,
> HeapNumber::kValueOffset));
> > -
> > -    __ bind(&convert_exponent);
> > +    __ jmp(&unpack_exponent);
> > +
> > +    __ bind(&base_is_smi);
> > +    __ SmiUntag(base);
> > +    __ vmov(single_scratch, base);
> > +    __ vcvt_f64_s32(double_base, single_scratch);
> > +    __ bind(&unpack_exponent);
> > +
> >     __ JumpIfNotSmi(exponent, &exponent_not_smi);
> >     __ SmiUntag(exponent);
> > -
> > -    // The base is in a double register and the exponent is
> > -    // an untagged smi. Allocate a heap number and call a
> > -    // C function for integer exponents. The register containing
> > -    // the heap number is callee-saved.
> > -    __ AllocateHeapNumber(heapnumber,
> > -                          scratch,
> > -                          scratch2,
> > -                          heapnumbermap,
> > -                          &call_runtime);
> > -    __ push(lr);
> > -    __ PrepareCallCFunction(1, 1, scratch);
> > -    __ SetCallCDoubleArguments(double_base, exponent);
> > -    {
> > -      AllowExternalCallThatCantCauseGC scope(masm);
> > -      __ CallCFunction(
> > -          ExternalReference::power_double_int_function(masm->isolate()),
> > -          1, 1);
> > -      __ pop(lr);
> > -      __ GetCFunctionDoubleResult(double_result);
> > -    }
> > -    __ vstr(double_result,
> > -            FieldMemOperand(heapnumber, HeapNumber::kValueOffset));
> > -    __ mov(r0, heapnumber);
> > -    __ Ret(2 * kPointerSize);
> > +    __ jmp(&int_exponent);
> >
> >     __ bind(&exponent_not_smi);
> >     __ ldr(scratch, FieldMemOperand(exponent, JSObject::kMapOffset));
> >     __ cmp(scratch, heapnumbermap);
> >     __ b(ne, &call_runtime);
> > -    // Exponent is a heapnumber. Load it into double register.
> >     __ vldr(double_exponent,
> >             FieldMemOperand(exponent, HeapNumber::kValueOffset));
> > -
> > -    // The base and the exponent are in double registers.
> > -    // Allocate a heap number and call a C function for
> > -    // double exponents. The register containing
> > -    // the heap number is callee-saved.
> > -    __ AllocateHeapNumber(heapnumber,
> > -                          scratch,
> > -                          scratch2,
> > -                          heapnumbermap,
> > -                          &call_runtime);
> > +  } else if (exponent_type_ == TAGGED) {
> > +    // Base is already in double_base.
> > +    __ JumpIfNotSmi(exponent, &exponent_not_smi);
> > +    __ SmiUntag(exponent);
> > +    __ jmp(&int_exponent);
> > +
> > +    __ bind(&exponent_not_smi);
> > +    __ vldr(double_exponent,
> > +            FieldMemOperand(exponent, HeapNumber::kValueOffset));
> > +  }
> > +
> > +  if (exponent_type_ != INTEGER) {
> > +    // Detect integer exponents stored as double.
> > +    __ vcvt_u32_f64(single_scratch, double_exponent);
> > +    // We do not check for NaN or Infinity here because comparing
> numbers
> > on
> > +    // ARM correctly distinguishes NaNs.  We end up calling the
> built-in.
> > +    __ vcvt_f64_u32(double_scratch, single_scratch);
> > +    __ VFPCompareAndSetFlags(double_scratch, double_exponent);
> > +    __ vmov(exponent, single_scratch, eq);
> > +    __ b(eq, &int_exponent);
> > +
> > +    if (exponent_type_ == ON_STACK) {
> > +      // Detect square root case.  Crankshaft detects constant +/-0.5 at
> > +      // compile time and uses DoMathPowHalf instead.  We then skip this
> > check
> > +      // for non-constant cases of +/-0.5 as these hardly occur.
> > +      Label not_plus_half;
> > +
> > +      // Test for 0.5.
> > +      __ vmov(double_scratch, 0.5);
> > +      __ VFPCompareAndSetFlags(double_exponent, double_scratch);
> > +      __ b(ne, &not_plus_half);
> > +
> > +      // Calculates square root of base.  Check for the special case of
> > +      // Math.pow(-Infinity, 0.5) == Infinity (ECMA spec, 15.8.2.13).
> > +      __ vmov(double_scratch, -V8_INFINITY);
> > +      __ VFPCompareAndSetFlags(double_base, double_scratch);
> > +      __ vneg(double_result, double_scratch, eq);
> > +      __ b(eq, &done);
> > +
> > +      // Add +0 to convert -0 to +0.
> > +      __ vadd(double_scratch, double_base, kDoubleRegZero);
> > +      __ vsqrt(double_result, double_scratch);
> > +      __ jmp(&done);
> > +
> > +      __ bind(&not_plus_half);
> > +      __ vmov(double_scratch, -0.5);
> > +      __ VFPCompareAndSetFlags(double_exponent, double_scratch);
> > +      __ b(ne, &call_runtime);
> > +
> > +      // Calculates square root of base.  Check for the special case of
> > +      // Math.pow(-Infinity, -0.5) == 0 (ECMA spec, 15.8.2.13).
> > +      __ vmov(double_scratch, -V8_INFINITY);
> > +      __ VFPCompareAndSetFlags(double_base, double_scratch);
> > +      __ vmov(double_result, kDoubleRegZero, eq);
> > +      __ b(eq, &done);
> > +
> > +      // Add +0 to convert -0 to +0.
> > +      __ vadd(double_scratch, double_base, kDoubleRegZero);
> > +      __ vmov(double_result, 1);
> > +      __ vsqrt(double_scratch, double_scratch);
> > +      __ vdiv(double_result, double_result, double_scratch);
> > +      __ jmp(&done);
> > +    }
> > +
> >     __ push(lr);
> > -    __ PrepareCallCFunction(0, 2, scratch);
> > -    __ SetCallCDoubleArguments(double_base, double_exponent);
> >     {
> >       AllowExternalCallThatCantCauseGC scope(masm);
> > +      __ PrepareCallCFunction(0, 2, scratch);
> > +      __ SetCallCDoubleArguments(double_base, double_exponent);
> >       __ CallCFunction(
> >
> ExternalReference::power_double_double_function(masm->isolate()),
> >           0, 2);
> > -      __ pop(lr);
> > -      __ GetCFunctionDoubleResult(double_result);
> > -    }
> > +    }
> > +    __ pop(lr);
> > +    __ GetCFunctionDoubleResult(double_result);
> > +    __ jmp(&done);
> > +  }
> > +
> > +  // Calculate power with integer exponent.
> > +  __ bind(&int_exponent);
> > +
> > +  __ mov(scratch, exponent);  // Back up exponent.
> > +  __ vmov(double_scratch, double_base);  // Back up base.
> > +  __ vmov(double_result, 1.0);
> > +
> > +  // Get absolute value of exponent.
> > +  __ cmp(scratch, Operand(0));
> > +  __ mov(scratch2, Operand(0), LeaveCC, mi);
> > +  __ sub(scratch, scratch2, scratch, LeaveCC, mi);
> > +
> > +  Label while_true;
> > +  __ bind(&while_true);
> > +  __ mov(scratch, Operand(scratch, ASR, 1), SetCC);
> > +  __ vmul(double_result, double_result, double_scratch, cs);
> > +  __ vmul(double_scratch, double_scratch, double_scratch, ne);
> > +  __ b(ne, &while_true);
> > +
> > +  __ cmp(exponent, Operand(0));
> > +  __ b(ge, &done);
> > +  __ vmov(double_scratch, 1.0);
> > +  __ vdiv(double_result, double_scratch, double_result);
> > +  // Test whether result is zero.  Bail out to check for subnormal
> result.
> > +  // Due to subnormals, x^-y == (1/x)^y does not hold in all cases.
> > +  __ VFPCompareAndSetFlags(double_result, 0.0);
> > +  __ b(ne, &done);
> > +  // double_exponent may not containe the exponent value if the input
> was a
> > +  // smi.  We set it with exponent value before bailing out.
> > +  __ vmov(single_scratch, exponent);
> > +  __ vcvt_f64_s32(double_exponent, single_scratch);
> > +
> > +  // Returning or bailing out.
> > +  Counters* counters = masm->isolate()->counters();
> > +  if (exponent_type_ == ON_STACK) {
> > +    // The arguments are still on the stack.
> > +    __ bind(&call_runtime);
> > +    __ TailCallRuntime(Runtime::kMath_pow_cfunction, 2, 1);
> > +
> > +    // The stub is called from non-optimized code, which expects the
> result
> > +    // as heap number in exponent.
> > +    __ bind(&done);
> > +    __ AllocateHeapNumber(
> > +        heapnumber, scratch, scratch2, heapnumbermap, &call_runtime);
> >     __ vstr(double_result,
> >             FieldMemOperand(heapnumber, HeapNumber::kValueOffset));
> > -    __ mov(r0, heapnumber);
> > +    ASSERT(heapnumber.is(r0));
> > +    __ IncrementCounter(counters->math_pow(), 1, scratch, scratch2);
> >     __ Ret(2 * kPointerSize);
> > -  }
> > -
> > -  __ bind(&call_runtime);
> > -  __ TailCallRuntime(Runtime::kMath_pow_cfunction, 2, 1);
> > +  } else {
> > +    __ push(lr);
> > +    {
> > +      AllowExternalCallThatCantCauseGC scope(masm);
> > +      __ PrepareCallCFunction(0, 2, scratch);
> > +      __ SetCallCDoubleArguments(double_base, double_exponent);
> > +      __ CallCFunction(
> > +
>  ExternalReference::power_double_double_function(masm->isolate()),
> > +          0, 2);
> > +    }
> > +    __ pop(lr);
> > +    __ GetCFunctionDoubleResult(double_result);
> > +
> > +    __ bind(&done);
> > +    __ IncrementCounter(counters->math_pow(), 1, scratch, scratch2);
> > +    __ Ret();
> > +  }
> >  }
> >
> >
> > =======================================
> > --- /branches/bleeding_edge/src/arm/full-codegen-arm.cc Fri Dec  2
> 00:06:37
> > 2011
> > +++ /branches/bleeding_edge/src/arm/full-codegen-arm.cc Wed Dec  7
> 08:55:00
> > 2011
> > @@ -2938,8 +2938,12 @@
> >   ASSERT(args->length() == 2);
> >   VisitForStackValue(args->at(0));
> >   VisitForStackValue(args->at(1));
> > -  MathPowStub stub(MathPowStub::ON_STACK);
> > -  __ CallStub(&stub);
> > +  if (CpuFeatures::IsSupported(VFP3)) {
> > +    MathPowStub stub(MathPowStub::ON_STACK);
> > +    __ CallStub(&stub);
> > +  } else {
> > +    __ CallRuntime(Runtime::kMath_pow, 2);
> > +  }
> >   context()->Plug(r0);
> >  }
> >
> > =======================================
> > --- /branches/bleeding_edge/src/arm/lithium-arm.cc      Tue Dec  6
> 01:37:50
> > 2011
> > +++ /branches/bleeding_edge/src/arm/lithium-arm.cc      Wed Dec  7
> 08:55:00
> > 2011
> > @@ -1405,7 +1405,7 @@
> >   LOperand* left = UseFixedDouble(instr->left(), d1);
> >   LOperand* right = exponent_type.IsDouble() ?
> >       UseFixedDouble(instr->right(), d2) :
> > -      UseFixed(instr->right(), r0);
> > +      UseFixed(instr->right(), r2);
> >   LPower* result = new LPower(left, right);
> >   return MarkAsCall(DefineFixedDouble(result, d3),
> >                     instr,
> > =======================================
> > --- /branches/bleeding_edge/src/arm/lithium-codegen-arm.cc      Wed Dec
>  7
> > 02:13:46 2011
> > +++ /branches/bleeding_edge/src/arm/lithium-codegen-arm.cc      Wed Dec
>  7
> > 08:55:00 2011
> > @@ -3122,61 +3122,34 @@
> >
> >
> >  void LCodeGen::DoPower(LPower* instr) {
> > -  LOperand* left = instr->InputAt(0);
> > -  LOperand* right = instr->InputAt(1);
> > -  Register scratch = scratch0();
> > -  DoubleRegister result_reg = ToDoubleRegister(instr->result());
> >   Representation exponent_type =
> > instr->hydrogen()->right()->representation();
> > -  if (exponent_type.IsDouble()) {
> > -    // Prepare arguments and call C function.
> > -    __ PrepareCallCFunction(0, 2, scratch);
> > -    __ SetCallCDoubleArguments(ToDoubleRegister(left),
> > -                               ToDoubleRegister(right));
> > -    __ CallCFunction(
> > -        ExternalReference::power_double_double_function(isolate()), 0,
> 2);
> > -  } else if (exponent_type.IsInteger32()) {
> > -    ASSERT(ToRegister(right).is(r0));
> > -    // Prepare arguments and call C function.
> > -    __ PrepareCallCFunction(1, 1, scratch);
> > -    __ SetCallCDoubleArguments(ToDoubleRegister(left),
> ToRegister(right));
> > -    __ CallCFunction(
> > -        ExternalReference::power_double_int_function(isolate()), 1, 1);
> > -  } else {
> > -    ASSERT(exponent_type.IsTagged());
> > -    ASSERT(instr->hydrogen()->left()->representation().IsDouble());
> > -
> > -    Register right_reg = ToRegister(right);
> > -
> > -    // Check for smi on the right hand side.
> > -    Label non_smi, call;
> > -    __ JumpIfNotSmi(right_reg, &non_smi);
> > -
> > -    // Untag smi and convert it to a double.
> > -    __ SmiUntag(right_reg);
> > -    SwVfpRegister single_scratch = double_scratch0().low();
> > -    __ vmov(single_scratch, right_reg);
> > -    __ vcvt_f64_s32(result_reg, single_scratch);
> > -    __ jmp(&call);
> > -
> > -    // Heap number map check.
> > -    __ bind(&non_smi);
> > -    __ ldr(scratch, FieldMemOperand(right_reg, HeapObject::kMapOffset));
> > +  // Having marked this as a call, we can use any registers.
> > +  // Just make sure that the input/output registers are the expected
> ones.
> > +  ASSERT(!instr->InputAt(1)->IsDoubleRegister() ||
> > +         ToDoubleRegister(instr->InputAt(1)).is(d2));
> > +  ASSERT(!instr->InputAt(1)->IsRegister() ||
> > +         ToRegister(instr->InputAt(1)).is(r2));
> > +  ASSERT(ToDoubleRegister(instr->InputAt(0)).is(d1));
> > +  ASSERT(ToDoubleRegister(instr->result()).is(d3));
> > +
> > +  if (exponent_type.IsTagged()) {
> > +    Label no_deopt;
> > +    __ JumpIfSmi(r2, &no_deopt);
> > +    __ ldr(r7, FieldMemOperand(r2, HeapObject::kMapOffset));
> >     __ LoadRoot(ip, Heap::kHeapNumberMapRootIndex);
> > -    __ cmp(scratch, Operand(ip));
> > +    __ cmp(r7, Operand(ip));
> >     DeoptimizeIf(ne, instr->environment());
> > -    int32_t value_offset = HeapNumber::kValueOffset - kHeapObjectTag;
> > -    __ add(scratch, right_reg, Operand(value_offset));
> > -    __ vldr(result_reg, scratch, 0);
> > -
> > -    // Prepare arguments and call C function.
> > -    __ bind(&call);
> > -    __ PrepareCallCFunction(0, 2, scratch);
> > -    __ SetCallCDoubleArguments(ToDoubleRegister(left), result_reg);
> > -    __ CallCFunction(
> > -        ExternalReference::power_double_double_function(isolate()), 0,
> 2);
> > -  }
> > -  // Store the result in the result register.
> > -  __ GetCFunctionDoubleResult(result_reg);
> > +    __ bind(&no_deopt);
> > +    MathPowStub stub(MathPowStub::TAGGED);
> > +    __ CallStub(&stub);
> > +  } else if (exponent_type.IsInteger32()) {
> > +    MathPowStub stub(MathPowStub::INTEGER);
> > +    __ CallStub(&stub);
> > +  } else {
> > +    ASSERT(exponent_type.IsDouble());
> > +    MathPowStub stub(MathPowStub::DOUBLE);
> > +    __ CallStub(&stub);
> > +  }
> >  }
> >
> >
> > --
> > v8-dev mailing list
> > [email protected]
> > http://groups.google.com/group/v8-dev
>

-- 
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev

[v8-dev] Re: r10210 committed - Port Math.pow inlining to ARM....

Reply via email to