[jira] [Comment Edited] (SPARK-31714) Performance test on java vectorization vs dot vs gemv vs gemm

zhengruifeng (Jira) Fri, 15 May 2020 00:34:28 -0700


    [ 
https://issues.apache.org/jira/browse/SPARK-31714?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17108018#comment-17108018
 ]


zhengruifeng edited comment on SPARK-31714 at 5/15/20, 7:33 AM:
----------------------------------------------------------------

additionally test on impl of gemv:
{code:java}
test("performance: gemv vs while-gemv") {
  def whileGemv(mat: DenseMatrix, vec: DenseVector): DenseVector = {
    require(!mat.isTransposed)
    val m = mat.numRows
    val n = mat.numCols
    require(vec.size == n)
    val matValues = mat.values
    val vecValues = vec.values
    val output = Array.ofDim[Double](m)
    var i = 0
    var j = 0
    while (j < n) {
      val startIdx = m * j
      val v = vecValues(j)
      i = 0
      while (i < m) {
        output(i) += matValues(startIdx + i) * v
        i += 1
      }
      j += 1
    }
    new DenseVector(output)
  }

  val shapeBuffer = mutable.ArrayBuilder.make[String]()
  val ratioBuffer = mutable.ArrayBuilder.make[Double]()

  for (numRows <- Seq(16, 64, 256, 1024, 4096); numCols <- Seq(16, 64, 256, 
1024, 4096)) {
    val rng = new Random(123)
    val matrix = Matrices.dense(numRows, numCols,
      Array.fill(numRows * numCols)(rng.nextDouble)).toDense
    val vectors = matrix.rowIter.toArray
    val coefVec = Vectors.dense(Array.fill(numCols)(rng.nextDouble)).toDense
    val coefArr = coefVec.toArray

    val start1 = System.nanoTime
    Seq.range(0, 100).foreach { _ => matrix.multiply(coefVec) }
    val dur1 = System.nanoTime - start1

    val start2 = System.nanoTime
    Seq.range(0, 100).foreach { _ => whileGemv(matrix, coefVec) }
    val dur2 = System.nanoTime - start2

    shapeBuffer += s"$numRows X $numCols"
    ratioBuffer += dur1 / dur2.toDouble
    println(s"numRows=$numRows, numCols=$numCols, gemv: $dur1, whileGemv: 
$dur2, " +
      s"gemv/whileGemv: ${dur1.toDouble / dur2}")
  }

  println(s"shapes: ${shapeBuffer.result().mkString(",")}")
  println(s"ratios: ${ratioBuffer.result().mkString(",")}")
}
 {code}
 

 

duration of BLAS(openblas) : BLAS(java) : java vectorization(whileGemv)

Smaller is better

 

16 X 16: 10.102879222350534 : 9.959393672790585 : 1
 16 X 64: 0.6313347039650034 : 1.5069314081915879 : 1
 16 X 256: 0.910207085544699 : 1.6170097903436782 : 1
 16 X 1024: 0.14076750751831094 : 1.6376539758035005 : 1
 16 X 4096: 0.3820137565286111 : 1.4649140200740003 : 1
 64 X 16: 2.8673586429725364 : 1.969880827023684 : 1
 64 X 64: 1.0055741530692275 : 1.3026339290803859 : 1
 64 X 256: 0.5070096449300102 : 1.2295682324328647 : 1
 64 X 1024: 0.3274242265593191 : 1.2509151212941314 : 1
 64 X 4096: 0.3128853980795693 : 1.2300961378942419 : 1
 256 X 16: 0.706246615744421 : 1.2293631722237384 : 1
 256 X 64: 0.4953318665588364 : 1.0788036857858834 : 1
 256 X 256: 0.3683838887701576 : 1.1598682179753397 : 1
 256 X 1024: 0.310782477418242 : 1.1286869048387194 : 1
 256 X 4096: 0.5179985507534923 : 1.113165303546807 : 1
 1024 X 16: 0.5854246295743595 : 1.2117608900770562 : 1
 1024 X 64: 0.4417717319177173 : 1.1725839824047304 : 1
 1024 X 256: 0.3816961486090574 : 1.1040280425824138 : 1
 1024 X 1024: 0.4209589414251511 : 1.1066541963615741 : 1
 1024 X 4096: 0.5353395921250336 : 1.078501530540412 : 1
 4096 X 16: 0.5745857849841409 : 1.1618592112098773 : 1
 4096 X 64: 0.47592411311765476 : 1.129909923930711 : 1
 4096 X 256: 0.4784264781542997 : 1.1055266185525001 : 1
 4096 X 1024: 0.5531428334840445 : 1.0856731602285508 : 1
 4096 X 4096: 0.592064493623388 : 1.060620615275768 : 1

 


was (Author: podongfeng):
additionally test on impl of gemv:
{code:java}
test("performance: gemv vs while-gemv") {
  def whileGemv(mat: DenseMatrix, vec: DenseVector): DenseVector = {
    require(!mat.isTransposed)
    val m = mat.numRows
    val n = mat.numCols
    require(vec.size == n)
    val matValues = mat.values
    val vecValues = vec.values
    val output = Array.ofDim[Double](m)
    var i = 0
    var j = 0
    while (j < n) {
      val startIdx = m * j
      val v = vecValues(j)
      i = 0
      while (i < m) {
        output(i) += matValues(startIdx + i) * v
        i += 1
      }
      j += 1
    }
    new DenseVector(output)
  }

  val shapeBuffer = mutable.ArrayBuilder.make[String]()
  val ratioBuffer = mutable.ArrayBuilder.make[Double]()

  for (numRows <- Seq(16, 64, 256, 1024, 4096); numCols <- Seq(16, 64, 256, 
1024, 4096)) {
    val rng = new Random(123)
    val matrix = Matrices.dense(numRows, numCols,
      Array.fill(numRows * numCols)(rng.nextDouble)).toDense
    val vectors = matrix.rowIter.toArray
    val coefVec = Vectors.dense(Array.fill(numCols)(rng.nextDouble)).toDense
    val coefArr = coefVec.toArray

    val start1 = System.nanoTime
    Seq.range(0, 100).foreach { _ => matrix.multiply(coefVec) }
    val dur1 = System.nanoTime - start1

    val start2 = System.nanoTime
    Seq.range(0, 100).foreach { _ => whileGemv(matrix, coefVec) }
    val dur2 = System.nanoTime - start2

    shapeBuffer += s"$numRows X $numCols"
    ratioBuffer += dur1 / dur2.toDouble
    println(s"numRows=$numRows, numCols=$numCols, gemv: $dur1, whileGemv: 
$dur2, " +
      s"gemv/whileGemv: ${dur1.toDouble / dur2}")
  }

  println(s"shapes: ${shapeBuffer.result().mkString(",")}")
  println(s"ratios: ${ratioBuffer.result().mkString(",")}")
}
 {code}
 

 

duration of BLAS(openblas), BLAS(java), java vectorization(whileGemv)

16 X 16: 10.102879222350534 : 9.959393672790585 : 1
16 X 64: 0.6313347039650034 : 1.5069314081915879 : 1
16 X 256: 0.910207085544699 : 1.6170097903436782 : 1
16 X 1024: 0.14076750751831094 : 1.6376539758035005 : 1
16 X 4096: 0.3820137565286111 : 1.4649140200740003 : 1
64 X 16: 2.8673586429725364 : 1.969880827023684 : 1
64 X 64: 1.0055741530692275 : 1.3026339290803859 : 1
64 X 256: 0.5070096449300102 : 1.2295682324328647 : 1
64 X 1024: 0.3274242265593191 : 1.2509151212941314 : 1
64 X 4096: 0.3128853980795693 : 1.2300961378942419 : 1
256 X 16: 0.706246615744421 : 1.2293631722237384 : 1
256 X 64: 0.4953318665588364 : 1.0788036857858834 : 1
256 X 256: 0.3683838887701576 : 1.1598682179753397 : 1
256 X 1024: 0.310782477418242 : 1.1286869048387194 : 1
256 X 4096: 0.5179985507534923 : 1.113165303546807 : 1
1024 X 16: 0.5854246295743595 : 1.2117608900770562 : 1
1024 X 64: 0.4417717319177173 : 1.1725839824047304 : 1
1024 X 256: 0.3816961486090574 : 1.1040280425824138 : 1
1024 X 1024: 0.4209589414251511 : 1.1066541963615741 : 1
1024 X 4096: 0.5353395921250336 : 1.078501530540412 : 1
4096 X 16: 0.5745857849841409 : 1.1618592112098773 : 1
4096 X 64: 0.47592411311765476 : 1.129909923930711 : 1
4096 X 256: 0.4784264781542997 : 1.1055266185525001 : 1
4096 X 1024: 0.5531428334840445 : 1.0856731602285508 : 1
4096 X 4096: 0.592064493623388 : 1.060620615275768 : 1

 

> Performance test on java vectorization vs dot vs gemv vs gemm
> -------------------------------------------------------------
>
>                 Key: SPARK-31714
>                 URL: https://issues.apache.org/jira/browse/SPARK-31714
>             Project: Spark
>          Issue Type: Sub-task
>          Components: ML
>    Affects Versions: 3.1.0
>            Reporter: zhengruifeng
>            Assignee: zhengruifeng
>            Priority: Minor
>         Attachments: BLASSuite.scala, blas-perf
>
>




--
This message was sent by Atlassian Jira
(v8.3.4#803005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

[jira] [Comment Edited] (SPARK-31714) Performance test on java vectorization vs dot vs gemv vs gemm

Reply via email to