[ https://issues.apache.org/jira/browse/SPARK-31714?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17108018#comment-17108018 ]
zhengruifeng edited comment on SPARK-31714 at 5/15/20, 7:33 AM: ---------------------------------------------------------------- additionally test on impl of gemv: {code:java} test("performance: gemv vs while-gemv") { def whileGemv(mat: DenseMatrix, vec: DenseVector): DenseVector = { require(!mat.isTransposed) val m = mat.numRows val n = mat.numCols require(vec.size == n) val matValues = mat.values val vecValues = vec.values val output = Array.ofDim[Double](m) var i = 0 var j = 0 while (j < n) { val startIdx = m * j val v = vecValues(j) i = 0 while (i < m) { output(i) += matValues(startIdx + i) * v i += 1 } j += 1 } new DenseVector(output) } val shapeBuffer = mutable.ArrayBuilder.make[String]() val ratioBuffer = mutable.ArrayBuilder.make[Double]() for (numRows <- Seq(16, 64, 256, 1024, 4096); numCols <- Seq(16, 64, 256, 1024, 4096)) { val rng = new Random(123) val matrix = Matrices.dense(numRows, numCols, Array.fill(numRows * numCols)(rng.nextDouble)).toDense val vectors = matrix.rowIter.toArray val coefVec = Vectors.dense(Array.fill(numCols)(rng.nextDouble)).toDense val coefArr = coefVec.toArray val start1 = System.nanoTime Seq.range(0, 100).foreach { _ => matrix.multiply(coefVec) } val dur1 = System.nanoTime - start1 val start2 = System.nanoTime Seq.range(0, 100).foreach { _ => whileGemv(matrix, coefVec) } val dur2 = System.nanoTime - start2 shapeBuffer += s"$numRows X $numCols" ratioBuffer += dur1 / dur2.toDouble println(s"numRows=$numRows, numCols=$numCols, gemv: $dur1, whileGemv: $dur2, " + s"gemv/whileGemv: ${dur1.toDouble / dur2}") } println(s"shapes: ${shapeBuffer.result().mkString(",")}") println(s"ratios: ${ratioBuffer.result().mkString(",")}") } {code} duration of BLAS(openblas) : BLAS(java) : java vectorization(whileGemv) Smaller is better 16 X 16: 10.102879222350534 : 9.959393672790585 : 1 16 X 64: 0.6313347039650034 : 1.5069314081915879 : 1 16 X 256: 0.910207085544699 : 1.6170097903436782 : 1 16 X 1024: 0.14076750751831094 : 1.6376539758035005 : 1 16 X 4096: 0.3820137565286111 : 1.4649140200740003 : 1 64 X 16: 2.8673586429725364 : 1.969880827023684 : 1 64 X 64: 1.0055741530692275 : 1.3026339290803859 : 1 64 X 256: 0.5070096449300102 : 1.2295682324328647 : 1 64 X 1024: 0.3274242265593191 : 1.2509151212941314 : 1 64 X 4096: 0.3128853980795693 : 1.2300961378942419 : 1 256 X 16: 0.706246615744421 : 1.2293631722237384 : 1 256 X 64: 0.4953318665588364 : 1.0788036857858834 : 1 256 X 256: 0.3683838887701576 : 1.1598682179753397 : 1 256 X 1024: 0.310782477418242 : 1.1286869048387194 : 1 256 X 4096: 0.5179985507534923 : 1.113165303546807 : 1 1024 X 16: 0.5854246295743595 : 1.2117608900770562 : 1 1024 X 64: 0.4417717319177173 : 1.1725839824047304 : 1 1024 X 256: 0.3816961486090574 : 1.1040280425824138 : 1 1024 X 1024: 0.4209589414251511 : 1.1066541963615741 : 1 1024 X 4096: 0.5353395921250336 : 1.078501530540412 : 1 4096 X 16: 0.5745857849841409 : 1.1618592112098773 : 1 4096 X 64: 0.47592411311765476 : 1.129909923930711 : 1 4096 X 256: 0.4784264781542997 : 1.1055266185525001 : 1 4096 X 1024: 0.5531428334840445 : 1.0856731602285508 : 1 4096 X 4096: 0.592064493623388 : 1.060620615275768 : 1 was (Author: podongfeng): additionally test on impl of gemv: {code:java} test("performance: gemv vs while-gemv") { def whileGemv(mat: DenseMatrix, vec: DenseVector): DenseVector = { require(!mat.isTransposed) val m = mat.numRows val n = mat.numCols require(vec.size == n) val matValues = mat.values val vecValues = vec.values val output = Array.ofDim[Double](m) var i = 0 var j = 0 while (j < n) { val startIdx = m * j val v = vecValues(j) i = 0 while (i < m) { output(i) += matValues(startIdx + i) * v i += 1 } j += 1 } new DenseVector(output) } val shapeBuffer = mutable.ArrayBuilder.make[String]() val ratioBuffer = mutable.ArrayBuilder.make[Double]() for (numRows <- Seq(16, 64, 256, 1024, 4096); numCols <- Seq(16, 64, 256, 1024, 4096)) { val rng = new Random(123) val matrix = Matrices.dense(numRows, numCols, Array.fill(numRows * numCols)(rng.nextDouble)).toDense val vectors = matrix.rowIter.toArray val coefVec = Vectors.dense(Array.fill(numCols)(rng.nextDouble)).toDense val coefArr = coefVec.toArray val start1 = System.nanoTime Seq.range(0, 100).foreach { _ => matrix.multiply(coefVec) } val dur1 = System.nanoTime - start1 val start2 = System.nanoTime Seq.range(0, 100).foreach { _ => whileGemv(matrix, coefVec) } val dur2 = System.nanoTime - start2 shapeBuffer += s"$numRows X $numCols" ratioBuffer += dur1 / dur2.toDouble println(s"numRows=$numRows, numCols=$numCols, gemv: $dur1, whileGemv: $dur2, " + s"gemv/whileGemv: ${dur1.toDouble / dur2}") } println(s"shapes: ${shapeBuffer.result().mkString(",")}") println(s"ratios: ${ratioBuffer.result().mkString(",")}") } {code} duration of BLAS(openblas), BLAS(java), java vectorization(whileGemv) 16 X 16: 10.102879222350534 : 9.959393672790585 : 1 16 X 64: 0.6313347039650034 : 1.5069314081915879 : 1 16 X 256: 0.910207085544699 : 1.6170097903436782 : 1 16 X 1024: 0.14076750751831094 : 1.6376539758035005 : 1 16 X 4096: 0.3820137565286111 : 1.4649140200740003 : 1 64 X 16: 2.8673586429725364 : 1.969880827023684 : 1 64 X 64: 1.0055741530692275 : 1.3026339290803859 : 1 64 X 256: 0.5070096449300102 : 1.2295682324328647 : 1 64 X 1024: 0.3274242265593191 : 1.2509151212941314 : 1 64 X 4096: 0.3128853980795693 : 1.2300961378942419 : 1 256 X 16: 0.706246615744421 : 1.2293631722237384 : 1 256 X 64: 0.4953318665588364 : 1.0788036857858834 : 1 256 X 256: 0.3683838887701576 : 1.1598682179753397 : 1 256 X 1024: 0.310782477418242 : 1.1286869048387194 : 1 256 X 4096: 0.5179985507534923 : 1.113165303546807 : 1 1024 X 16: 0.5854246295743595 : 1.2117608900770562 : 1 1024 X 64: 0.4417717319177173 : 1.1725839824047304 : 1 1024 X 256: 0.3816961486090574 : 1.1040280425824138 : 1 1024 X 1024: 0.4209589414251511 : 1.1066541963615741 : 1 1024 X 4096: 0.5353395921250336 : 1.078501530540412 : 1 4096 X 16: 0.5745857849841409 : 1.1618592112098773 : 1 4096 X 64: 0.47592411311765476 : 1.129909923930711 : 1 4096 X 256: 0.4784264781542997 : 1.1055266185525001 : 1 4096 X 1024: 0.5531428334840445 : 1.0856731602285508 : 1 4096 X 4096: 0.592064493623388 : 1.060620615275768 : 1 > Performance test on java vectorization vs dot vs gemv vs gemm > ------------------------------------------------------------- > > Key: SPARK-31714 > URL: https://issues.apache.org/jira/browse/SPARK-31714 > Project: Spark > Issue Type: Sub-task > Components: ML > Affects Versions: 3.1.0 > Reporter: zhengruifeng > Assignee: zhengruifeng > Priority: Minor > Attachments: BLASSuite.scala, blas-perf > > -- This message was sent by Atlassian Jira (v8.3.4#803005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org