[ https://issues.apache.org/jira/browse/SPARK-24901?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Apache Spark reassigned SPARK-24901: ------------------------------------ Assignee: Apache Spark > Merge the codegen of RegularHashMap and fastHashMap to reduce compiler > maxCodesize when VectorizedHashMap is false > ------------------------------------------------------------------------------------------------------------------ > > Key: SPARK-24901 > URL: https://issues.apache.org/jira/browse/SPARK-24901 > Project: Spark > Issue Type: Improvement > Components: SQL > Affects Versions: 2.4.0 > Reporter: caoxuewen > Assignee: Apache Spark > Priority: Major > > Currently, Generate code of update UnsafeRow in hash aggregation. > FastHashMap and RegularHashMap are two separate codes,These two separate > codes need only when VectorizedHashMap is true. but other cases, we can merge > together to reduce compiler maxCodesize. thanks. > case class DistinctAgg(a: Int, b: Float, c: Double, d: Int, e: String) > spark.sparkContext.parallelize( > DistinctAgg(8, 2, 3, 4, "a") :: > DistinctAgg(9, 3, 4, 5, "b") > ::Nil).toDF()createOrReplaceTempView("distinctAgg") > val df = sql("select a,b,e, min(d) as mind, min(case when a > 10 then a else > null end) as mincasea, min(a) as mina from distinctAgg group by a, b, e") > println(org.apache.spark.sql.execution.debug.codegenString(df.queryExecution.executedPlan)) > df.show() > Generate code like: > *Before modified:* > Generated code: > /* 001 */ public Object generate(Object[] references) { > /* 002 */ return new GeneratedIteratorForCodegenStage1(references); > /* 003 */ } > /* 004 */ > ............... > /* 354 */ > /* 355 */ if (agg_fastAggBuffer_0 != null) { > /* 356 */ // common sub-expressions > /* 357 */ > /* 358 */ // evaluate aggregate function > /* 359 */ agg_agg_isNull_31_0 = true; > /* 360 */ int agg_value_34 = -1; > /* 361 */ > /* 362 */ boolean agg_isNull_32 = agg_fastAggBuffer_0.isNullAt(0); > /* 363 */ int agg_value_35 = agg_isNull_32 ? > /* 364 */ -1 : (agg_fastAggBuffer_0.getInt(0)); > /* 365 */ > /* 366 */ if (!agg_isNull_32 && (agg_agg_isNull_31_0 || > /* 367 */ agg_value_34 > agg_value_35)) { > /* 368 */ agg_agg_isNull_31_0 = false; > /* 369 */ agg_value_34 = agg_value_35; > /* 370 */ } > /* 371 */ > /* 372 */ if (!false && (agg_agg_isNull_31_0 || > /* 373 */ agg_value_34 > agg_expr_2_0)) { > /* 374 */ agg_agg_isNull_31_0 = false; > /* 375 */ agg_value_34 = agg_expr_2_0; > /* 376 */ } > /* 377 */ agg_agg_isNull_34_0 = true; > /* 378 */ int agg_value_37 = -1; > /* 379 */ > /* 380 */ boolean agg_isNull_35 = agg_fastAggBuffer_0.isNullAt(1); > /* 381 */ int agg_value_38 = agg_isNull_35 ? > /* 382 */ -1 : (agg_fastAggBuffer_0.getInt(1)); > /* 383 */ > /* 384 */ if (!agg_isNull_35 && (agg_agg_isNull_34_0 || > /* 385 */ agg_value_37 > agg_value_38)) { > /* 386 */ agg_agg_isNull_34_0 = false; > /* 387 */ agg_value_37 = agg_value_38; > /* 388 */ } > /* 389 */ > /* 390 */ byte agg_caseWhenResultState_1 = -1; > /* 391 */ do { > /* 392 */ boolean agg_value_40 = false; > /* 393 */ agg_value_40 = agg_expr_0_0 > 10; > /* 394 */ if (!false && agg_value_40) { > /* 395 */ agg_caseWhenResultState_1 = (byte)(false ? 1 : 0); > /* 396 */ agg_agg_value_39_0 = agg_expr_0_0; > /* 397 */ continue; > /* 398 */ } > /* 399 */ > /* 400 */ agg_caseWhenResultState_1 = (byte)(true ? 1 : 0); > /* 401 */ agg_agg_value_39_0 = -1; > /* 402 */ > /* 403 */ } while (false); > /* 404 */ // TRUE if any condition is met and the result is null, or no > any condition is met. > /* 405 */ final boolean agg_isNull_36 = (agg_caseWhenResultState_1 != > 0); > /* 406 */ > /* 407 */ if (!agg_isNull_36 && (agg_agg_isNull_34_0 || > /* 408 */ agg_value_37 > agg_agg_value_39_0)) { > /* 409 */ agg_agg_isNull_34_0 = false; > /* 410 */ agg_value_37 = agg_agg_value_39_0; > /* 411 */ } > /* 412 */ agg_agg_isNull_42_0 = true; > /* 413 */ int agg_value_45 = -1; > /* 414 */ > /* 415 */ boolean agg_isNull_43 = agg_fastAggBuffer_0.isNullAt(2); > /* 416 */ int agg_value_46 = agg_isNull_43 ? > /* 417 */ -1 : (agg_fastAggBuffer_0.getInt(2)); > /* 418 */ > /* 419 */ if (!agg_isNull_43 && (agg_agg_isNull_42_0 || > /* 420 */ agg_value_45 > agg_value_46)) { > /* 421 */ agg_agg_isNull_42_0 = false; > /* 422 */ agg_value_45 = agg_value_46; > /* 423 */ } > /* 424 */ > /* 425 */ if (!false && (agg_agg_isNull_42_0 || > /* 426 */ agg_value_45 > agg_expr_0_0)) { > /* 427 */ agg_agg_isNull_42_0 = false; > /* 428 */ agg_value_45 = agg_expr_0_0; > /* 429 */ } > /* 430 */ // update fast row > /* 431 */ agg_fastAggBuffer_0.setInt(0, agg_value_34); > /* 432 */ > /* 433 */ if (!agg_agg_isNull_34_0) { > /* 434 */ agg_fastAggBuffer_0.setInt(1, agg_value_37); > /* 435 */ } else { > /* 436 */ agg_fastAggBuffer_0.setNullAt(1); > /* 437 */ } > /* 438 */ > /* 439 */ agg_fastAggBuffer_0.setInt(2, agg_value_45); > /* 440 */ } else { > /* 441 */ // common sub-expressions > /* 442 */ > /* 443 */ // evaluate aggregate function > /* 444 */ agg_agg_isNull_17_0 = true; > /* 445 */ int agg_value_20 = -1; > /* 446 */ > /* 447 */ boolean agg_isNull_18 = agg_unsafeRowAggBuffer_0.isNullAt(0); > /* 448 */ int agg_value_21 = agg_isNull_18 ? > /* 449 */ -1 : (agg_unsafeRowAggBuffer_0.getInt(0)); > /* 450 */ > /* 451 */ if (!agg_isNull_18 && (agg_agg_isNull_17_0 || > /* 452 */ agg_value_20 > agg_value_21)) { > /* 453 */ agg_agg_isNull_17_0 = false; > /* 454 */ agg_value_20 = agg_value_21; > /* 455 */ } > /* 456 */ > /* 457 */ if (!false && (agg_agg_isNull_17_0 || > /* 458 */ agg_value_20 > agg_expr_2_0)) { > /* 459 */ agg_agg_isNull_17_0 = false; > /* 460 */ agg_value_20 = agg_expr_2_0; > /* 461 */ } > /* 462 */ agg_agg_isNull_20_0 = true; > /* 463 */ int agg_value_23 = -1; > /* 464 */ > /* 465 */ boolean agg_isNull_21 = agg_unsafeRowAggBuffer_0.isNullAt(1); > /* 466 */ int agg_value_24 = agg_isNull_21 ? > /* 467 */ -1 : (agg_unsafeRowAggBuffer_0.getInt(1)); > /* 468 */ > /* 469 */ if (!agg_isNull_21 && (agg_agg_isNull_20_0 || > /* 470 */ agg_value_23 > agg_value_24)) { > /* 471 */ agg_agg_isNull_20_0 = false; > /* 472 */ agg_value_23 = agg_value_24; > /* 473 */ } > /* 474 */ > /* 475 */ byte agg_caseWhenResultState_0 = -1; > /* 476 */ do { > /* 477 */ boolean agg_value_26 = false; > /* 478 */ agg_value_26 = agg_expr_0_0 > 10; > /* 479 */ if (!false && agg_value_26) { > /* 480 */ agg_caseWhenResultState_0 = (byte)(false ? 1 : 0); > /* 481 */ agg_agg_value_25_0 = agg_expr_0_0; > /* 482 */ continue; > /* 483 */ } > /* 484 */ > /* 485 */ agg_caseWhenResultState_0 = (byte)(true ? 1 : 0); > /* 486 */ agg_agg_value_25_0 = -1; > /* 487 */ > /* 488 */ } while (false); > /* 489 */ // TRUE if any condition is met and the result is null, or no > any condition is met. > /* 490 */ final boolean agg_isNull_22 = (agg_caseWhenResultState_0 != > 0); > /* 491 */ > /* 492 */ if (!agg_isNull_22 && (agg_agg_isNull_20_0 || > /* 493 */ agg_value_23 > agg_agg_value_25_0)) { > /* 494 */ agg_agg_isNull_20_0 = false; > /* 495 */ agg_value_23 = agg_agg_value_25_0; > /* 496 */ } > /* 497 */ agg_agg_isNull_28_0 = true; > /* 498 */ int agg_value_31 = -1; > /* 499 */ > /* 500 */ boolean agg_isNull_29 = agg_unsafeRowAggBuffer_0.isNullAt(2); > /* 501 */ int agg_value_32 = agg_isNull_29 ? > /* 502 */ -1 : (agg_unsafeRowAggBuffer_0.getInt(2)); > /* 503 */ > /* 504 */ if (!agg_isNull_29 && (agg_agg_isNull_28_0 || > /* 505 */ agg_value_31 > agg_value_32)) { > /* 506 */ agg_agg_isNull_28_0 = false; > /* 507 */ agg_value_31 = agg_value_32; > /* 508 */ } > /* 509 */ > /* 510 */ if (!false && (agg_agg_isNull_28_0 || > /* 511 */ agg_value_31 > agg_expr_0_0)) { > /* 512 */ agg_agg_isNull_28_0 = false; > /* 513 */ agg_value_31 = agg_expr_0_0; > /* 514 */ } > /* 515 */ // update unsafe row buffer > /* 516 */ agg_unsafeRowAggBuffer_0.setInt(0, agg_value_20); > /* 517 */ > /* 518 */ if (!agg_agg_isNull_20_0) { > /* 519 */ agg_unsafeRowAggBuffer_0.setInt(1, agg_value_23); > /* 520 */ } else { > /* 521 */ agg_unsafeRowAggBuffer_0.setNullAt(1); > /* 522 */ } > /* 523 */ > /* 524 */ agg_unsafeRowAggBuffer_0.setInt(2, agg_value_31); > /* 525 */ > /* 526 */ } > /* 527 */ > /* 528 */ } > ...................... > /* 554 */ // output the result > /* 555 */ > /* 556 */ while (agg_fastHashMapIter_0.next()) { > /* 557 */ UnsafeRow agg_aggKey_0 = (UnsafeRow) > agg_fastHashMapIter_0.getKey(); > /* 558 */ UnsafeRow agg_aggBuffer_0 = (UnsafeRow) > agg_fastHashMapIter_0.getValue(); > /* 559 */ agg_doAggregateWithKeysOutput_0(agg_aggKey_0, > agg_aggBuffer_0); > /* 560 */ > /* 561 */ if (shouldStop()) return; > /* 562 */ } > /* 563 */ agg_fastHashMap_0.close(); > /* 564 */ > /* 565 */ while (agg_mapIter_0.next()) { > /* 566 */ UnsafeRow agg_aggKey_0 = (UnsafeRow) agg_mapIter_0.getKey(); > /* 567 */ UnsafeRow agg_aggBuffer_0 = (UnsafeRow) > agg_mapIter_0.getValue(); > /* 568 */ agg_doAggregateWithKeysOutput_0(agg_aggKey_0, > agg_aggBuffer_0); > /* 569 */ > /* 570 */ if (shouldStop()) return; > /* 571 */ } > /* 572 */ > /* 573 */ agg_mapIter_0.close(); > /* 574 */ if (agg_sorter_0 == null) { > /* 575 */ agg_hashMap_0.free(); > /* 576 */ } > /* 577 */ } > /* 578 */ > /* 579 */ } > whole codegen max code size:954 > *After modified:* > Generated code: > /* 001 */ public Object generate(Object[] references) { > /* 002 */ return new GeneratedIteratorForCodegenStage1(references); > /* 003 */ } > /* 004 */ > ............. > /* 350 */ > /* 351 */ if (agg_fastAggBuffer_0 != null) { > /* 352 */ agg_unsafeRowAggBuffer_0 = agg_fastAggBuffer_0; > /* 353 */ } > /* 354 */ > /* 355 */ // common sub-expressions > /* 356 */ > /* 357 */ // evaluate aggregate function > /* 358 */ agg_agg_isNull_17_0 = true; > /* 359 */ int agg_value_20 = -1; > /* 360 */ > /* 361 */ boolean agg_isNull_18 = agg_unsafeRowAggBuffer_0.isNullAt(0); > /* 362 */ int agg_value_21 = agg_isNull_18 ? > /* 363 */ -1 : (agg_unsafeRowAggBuffer_0.getInt(0)); > /* 364 */ > /* 365 */ if (!agg_isNull_18 && (agg_agg_isNull_17_0 || > /* 366 */ agg_value_20 > agg_value_21)) { > /* 367 */ agg_agg_isNull_17_0 = false; > /* 368 */ agg_value_20 = agg_value_21; > /* 369 */ } > /* 370 */ > /* 371 */ if (!false && (agg_agg_isNull_17_0 || > /* 372 */ agg_value_20 > agg_expr_2_0)) { > /* 373 */ agg_agg_isNull_17_0 = false; > /* 374 */ agg_value_20 = agg_expr_2_0; > /* 375 */ } > /* 376 */ agg_agg_isNull_20_0 = true; > /* 377 */ int agg_value_23 = -1; > /* 378 */ > /* 379 */ boolean agg_isNull_21 = agg_unsafeRowAggBuffer_0.isNullAt(1); > /* 380 */ int agg_value_24 = agg_isNull_21 ? > /* 381 */ -1 : (agg_unsafeRowAggBuffer_0.getInt(1)); > /* 382 */ > /* 383 */ if (!agg_isNull_21 && (agg_agg_isNull_20_0 || > /* 384 */ agg_value_23 > agg_value_24)) { > /* 385 */ agg_agg_isNull_20_0 = false; > /* 386 */ agg_value_23 = agg_value_24; > /* 387 */ } > /* 388 */ > /* 389 */ byte agg_caseWhenResultState_0 = -1; > /* 390 */ do { > /* 391 */ boolean agg_value_26 = false; > /* 392 */ agg_value_26 = agg_expr_0_0 > 10; > /* 393 */ if (!false && agg_value_26) { > /* 394 */ agg_caseWhenResultState_0 = (byte)(false ? 1 : 0); > /* 395 */ agg_agg_value_25_0 = agg_expr_0_0; > /* 396 */ continue; > /* 397 */ } > /* 398 */ > /* 399 */ agg_caseWhenResultState_0 = (byte)(true ? 1 : 0); > /* 400 */ agg_agg_value_25_0 = -1; > /* 401 */ > /* 402 */ } while (false); > /* 403 */ // TRUE if any condition is met and the result is null, or no > any condition is met. > /* 404 */ final boolean agg_isNull_22 = (agg_caseWhenResultState_0 != 0); > /* 405 */ > /* 406 */ if (!agg_isNull_22 && (agg_agg_isNull_20_0 || > /* 407 */ agg_value_23 > agg_agg_value_25_0)) { > /* 408 */ agg_agg_isNull_20_0 = false; > /* 409 */ agg_value_23 = agg_agg_value_25_0; > /* 410 */ } > /* 411 */ agg_agg_isNull_28_0 = true; > /* 412 */ int agg_value_31 = -1; > /* 413 */ > /* 414 */ boolean agg_isNull_29 = agg_unsafeRowAggBuffer_0.isNullAt(2); > /* 415 */ int agg_value_32 = agg_isNull_29 ? > /* 416 */ -1 : (agg_unsafeRowAggBuffer_0.getInt(2)); > /* 417 */ > /* 418 */ if (!agg_isNull_29 && (agg_agg_isNull_28_0 || > /* 419 */ agg_value_31 > agg_value_32)) { > /* 420 */ agg_agg_isNull_28_0 = false; > /* 421 */ agg_value_31 = agg_value_32; > /* 422 */ } > /* 423 */ > /* 424 */ if (!false && (agg_agg_isNull_28_0 || > /* 425 */ agg_value_31 > agg_expr_0_0)) { > /* 426 */ agg_agg_isNull_28_0 = false; > /* 427 */ agg_value_31 = agg_expr_0_0; > /* 428 */ } > /* 429 */ // update unsafe row buffer > /* 430 */ agg_unsafeRowAggBuffer_0.setInt(0, agg_value_20); > /* 431 */ > /* 432 */ if (!agg_agg_isNull_20_0) { > /* 433 */ agg_unsafeRowAggBuffer_0.setInt(1, agg_value_23); > /* 434 */ } else { > /* 435 */ agg_unsafeRowAggBuffer_0.setNullAt(1); > /* 436 */ } > /* 437 */ > /* 438 */ agg_unsafeRowAggBuffer_0.setInt(2, agg_value_31); > /* 439 */ > /* 440 */ } > /* 441 */ > ........... > /* 466 */ // output the result > /* 467 */ > /* 468 */ while (agg_fastHashMapIter_0.next()) { > /* 469 */ UnsafeRow agg_aggKey_0 = (UnsafeRow) > agg_fastHashMapIter_0.getKey(); > /* 470 */ UnsafeRow agg_aggBuffer_0 = (UnsafeRow) > agg_fastHashMapIter_0.getValue(); > /* 471 */ agg_doAggregateWithKeysOutput_0(agg_aggKey_0, > agg_aggBuffer_0); > /* 472 */ > /* 473 */ if (shouldStop()) return; > /* 474 */ } > /* 475 */ agg_fastHashMap_0.close(); > /* 476 */ > /* 477 */ while (agg_mapIter_0.next()) { > /* 478 */ UnsafeRow agg_aggKey_0 = (UnsafeRow) agg_mapIter_0.getKey(); > /* 479 */ UnsafeRow agg_aggBuffer_0 = (UnsafeRow) > agg_mapIter_0.getValue(); > /* 480 */ agg_doAggregateWithKeysOutput_0(agg_aggKey_0, > agg_aggBuffer_0); > /* 481 */ > /* 482 */ if (shouldStop()) return; > /* 483 */ } > /* 484 */ > /* 485 */ agg_mapIter_0.close(); > /* 486 */ if (agg_sorter_0 == null) { > /* 487 */ agg_hashMap_0.free(); > /* 488 */ } > /* 489 */ } > /* 490 */ > /* 491 */ } > whole codegen max code size:598 -- This message was sent by Atlassian JIRA (v7.6.3#76005) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org