GitHub user heary-cao opened a pull request:

    https://github.com/apache/spark/pull/21860

    [SPARK-24901][SQL]Merge the codegen of RegularHashMap and fastHashMap to 
reduce compiler maxCodesize when VectorizedHashMap is false.

    ## What changes were proposed in this pull request?
    
    Currently, Generate code of update UnsafeRow in hash aggregation.
    FastHashMap and RegularHashMap are two separate codes,These two separate 
codes need only when VectorizedHashMap is true. but other cases, we can merge 
together to reduce compiler maxCodesize. thanks.
    case class DistinctAgg(a: Int, b: Float, c: Double, d: Int, e: String)
    spark.sparkContext.parallelize(
          DistinctAgg(8, 2, 3, 4, "a") ::
          DistinctAgg(9, 3, 4, 5, "b") 
::Nil).toDF()createOrReplaceTempView("distinctAgg")
    val df = sql("select a,b,e, min(d) as mind, min(case when a > 10 then a 
else null end) as mincasea, min(a) as mina from distinctAgg group by a, b, e")
    
println(org.apache.spark.sql.execution.debug.codegenString(df.queryExecution.executedPlan))
    df.show()
    
    Generate code like:
     Before modified:
    Generated code:
    /* 001 */ public Object generate(Object[] references) {
    /* 002 */   return new GeneratedIteratorForCodegenStage1(references);
    /* 003 */ }
    /* 004 */
    ...............
    /* 354 */
    /* 355 */     if (agg_fastAggBuffer_0 != null) {
    /* 356 */       // common sub-expressions
    /* 357 */
    /* 358 */       // evaluate aggregate function
    /* 359 */       agg_agg_isNull_31_0 = true;
    /* 360 */       int agg_value_34 = -1;
    /* 361 */
    /* 362 */       boolean agg_isNull_32 = agg_fastAggBuffer_0.isNullAt(0);
    /* 363 */       int agg_value_35 = agg_isNull_32 ?
    /* 364 */       -1 : (agg_fastAggBuffer_0.getInt(0));
    /* 365 */
    /* 366 */       if (!agg_isNull_32 && (agg_agg_isNull_31_0 ||
    /* 367 */           agg_value_34 > agg_value_35)) {
    /* 368 */         agg_agg_isNull_31_0 = false;
    /* 369 */         agg_value_34 = agg_value_35;
    /* 370 */       }
    /* 371 */
    /* 372 */       if (!false && (agg_agg_isNull_31_0 ||
    /* 373 */           agg_value_34 > agg_expr_2_0)) {
    /* 374 */         agg_agg_isNull_31_0 = false;
    /* 375 */         agg_value_34 = agg_expr_2_0;
    /* 376 */       }
    /* 377 */       agg_agg_isNull_34_0 = true;
    /* 378 */       int agg_value_37 = -1;
    /* 379 */
    /* 380 */       boolean agg_isNull_35 = agg_fastAggBuffer_0.isNullAt(1);
    /* 381 */       int agg_value_38 = agg_isNull_35 ?
    /* 382 */       -1 : (agg_fastAggBuffer_0.getInt(1));
    /* 383 */
    /* 384 */       if (!agg_isNull_35 && (agg_agg_isNull_34_0 ||
    /* 385 */           agg_value_37 > agg_value_38)) {
    /* 386 */         agg_agg_isNull_34_0 = false;
    /* 387 */         agg_value_37 = agg_value_38;
    /* 388 */       }
    /* 389 */
    /* 390 */       byte agg_caseWhenResultState_1 = -1;
    /* 391 */       do {
    /* 392 */         boolean agg_value_40 = false;
    /* 393 */         agg_value_40 = agg_expr_0_0 > 10;
    /* 394 */         if (!false && agg_value_40) {
    /* 395 */           agg_caseWhenResultState_1 = (byte)(false ? 1 : 0);
    /* 396 */           agg_agg_value_39_0 = agg_expr_0_0;
    /* 397 */           continue;
    /* 398 */         }
    /* 399 */
    /* 400 */         agg_caseWhenResultState_1 = (byte)(true ? 1 : 0);
    /* 401 */         agg_agg_value_39_0 = -1;
    /* 402 */
    /* 403 */       } while (false);
    /* 404 */       // TRUE if any condition is met and the result is null, or 
no any condition is met.
    /* 405 */       final boolean agg_isNull_36 = (agg_caseWhenResultState_1 != 
0);
    /* 406 */
    /* 407 */       if (!agg_isNull_36 && (agg_agg_isNull_34_0 ||
    /* 408 */           agg_value_37 > agg_agg_value_39_0)) {
    /* 409 */         agg_agg_isNull_34_0 = false;
    /* 410 */         agg_value_37 = agg_agg_value_39_0;
    /* 411 */       }
    /* 412 */       agg_agg_isNull_42_0 = true;
    /* 413 */       int agg_value_45 = -1;
    /* 414 */
    /* 415 */       boolean agg_isNull_43 = agg_fastAggBuffer_0.isNullAt(2);
    /* 416 */       int agg_value_46 = agg_isNull_43 ?
    /* 417 */       -1 : (agg_fastAggBuffer_0.getInt(2));
    /* 418 */
    /* 419 */       if (!agg_isNull_43 && (agg_agg_isNull_42_0 ||
    /* 420 */           agg_value_45 > agg_value_46)) {
    /* 421 */         agg_agg_isNull_42_0 = false;
    /* 422 */         agg_value_45 = agg_value_46;
    /* 423 */       }
    /* 424 */
    /* 425 */       if (!false && (agg_agg_isNull_42_0 ||
    /* 426 */           agg_value_45 > agg_expr_0_0)) {
    /* 427 */         agg_agg_isNull_42_0 = false;
    /* 428 */         agg_value_45 = agg_expr_0_0;
    /* 429 */       }
    /* 430 */       // update fast row
    /* 431 */       agg_fastAggBuffer_0.setInt(0, agg_value_34);
    /* 432 */
    /* 433 */       if (!agg_agg_isNull_34_0) {
    /* 434 */         agg_fastAggBuffer_0.setInt(1, agg_value_37);
    /* 435 */       } else {
    /* 436 */         agg_fastAggBuffer_0.setNullAt(1);
    /* 437 */       }
    /* 438 */
    /* 439 */       agg_fastAggBuffer_0.setInt(2, agg_value_45);
    /* 440 */     } else {
    /* 441 */       // common sub-expressions
    /* 442 */
    /* 443 */       // evaluate aggregate function
    /* 444 */       agg_agg_isNull_17_0 = true;
    /* 445 */       int agg_value_20 = -1;
    /* 446 */
    /* 447 */       boolean agg_isNull_18 = 
agg_unsafeRowAggBuffer_0.isNullAt(0);
    /* 448 */       int agg_value_21 = agg_isNull_18 ?
    /* 449 */       -1 : (agg_unsafeRowAggBuffer_0.getInt(0));
    /* 450 */
    /* 451 */       if (!agg_isNull_18 && (agg_agg_isNull_17_0 ||
    /* 452 */           agg_value_20 > agg_value_21)) {
    /* 453 */         agg_agg_isNull_17_0 = false;
    /* 454 */         agg_value_20 = agg_value_21;
    /* 455 */       }
    /* 456 */
    /* 457 */       if (!false && (agg_agg_isNull_17_0 ||
    /* 458 */           agg_value_20 > agg_expr_2_0)) {
    /* 459 */         agg_agg_isNull_17_0 = false;
    /* 460 */         agg_value_20 = agg_expr_2_0;
    /* 461 */       }
    /* 462 */       agg_agg_isNull_20_0 = true;
    /* 463 */       int agg_value_23 = -1;
    /* 464 */
    /* 465 */       boolean agg_isNull_21 = 
agg_unsafeRowAggBuffer_0.isNullAt(1);
    /* 466 */       int agg_value_24 = agg_isNull_21 ?
    /* 467 */       -1 : (agg_unsafeRowAggBuffer_0.getInt(1));
    /* 468 */
    /* 469 */       if (!agg_isNull_21 && (agg_agg_isNull_20_0 ||
    /* 470 */           agg_value_23 > agg_value_24)) {
    /* 471 */         agg_agg_isNull_20_0 = false;
    /* 472 */         agg_value_23 = agg_value_24;
    /* 473 */       }
    /* 474 */
    /* 475 */       byte agg_caseWhenResultState_0 = -1;
    /* 476 */       do {
    /* 477 */         boolean agg_value_26 = false;
    /* 478 */         agg_value_26 = agg_expr_0_0 > 10;
    /* 479 */         if (!false && agg_value_26) {
    /* 480 */           agg_caseWhenResultState_0 = (byte)(false ? 1 : 0);
    /* 481 */           agg_agg_value_25_0 = agg_expr_0_0;
    /* 482 */           continue;
    /* 483 */         }
    /* 484 */
    /* 485 */         agg_caseWhenResultState_0 = (byte)(true ? 1 : 0);
    /* 486 */         agg_agg_value_25_0 = -1;
    /* 487 */
    /* 488 */       } while (false);
    /* 489 */       // TRUE if any condition is met and the result is null, or 
no any condition is met.
    /* 490 */       final boolean agg_isNull_22 = (agg_caseWhenResultState_0 != 
0);
    /* 491 */
    /* 492 */       if (!agg_isNull_22 && (agg_agg_isNull_20_0 ||
    /* 493 */           agg_value_23 > agg_agg_value_25_0)) {
    /* 494 */         agg_agg_isNull_20_0 = false;
    /* 495 */         agg_value_23 = agg_agg_value_25_0;
    /* 496 */       }
    /* 497 */       agg_agg_isNull_28_0 = true;
    /* 498 */       int agg_value_31 = -1;
    /* 499 */
    /* 500 */       boolean agg_isNull_29 = 
agg_unsafeRowAggBuffer_0.isNullAt(2);
    /* 501 */       int agg_value_32 = agg_isNull_29 ?
    /* 502 */       -1 : (agg_unsafeRowAggBuffer_0.getInt(2));
    /* 503 */
    /* 504 */       if (!agg_isNull_29 && (agg_agg_isNull_28_0 ||
    /* 505 */           agg_value_31 > agg_value_32)) {
    /* 506 */         agg_agg_isNull_28_0 = false;
    /* 507 */         agg_value_31 = agg_value_32;
    /* 508 */       }
    /* 509 */
    /* 510 */       if (!false && (agg_agg_isNull_28_0 ||
    /* 511 */           agg_value_31 > agg_expr_0_0)) {
    /* 512 */         agg_agg_isNull_28_0 = false;
    /* 513 */         agg_value_31 = agg_expr_0_0;
    /* 514 */       }
    /* 515 */       // update unsafe row buffer
    /* 516 */       agg_unsafeRowAggBuffer_0.setInt(0, agg_value_20);
    /* 517 */
    /* 518 */       if (!agg_agg_isNull_20_0) {
    /* 519 */         agg_unsafeRowAggBuffer_0.setInt(1, agg_value_23);
    /* 520 */       } else {
    /* 521 */         agg_unsafeRowAggBuffer_0.setNullAt(1);
    /* 522 */       }
    /* 523 */
    /* 524 */       agg_unsafeRowAggBuffer_0.setInt(2, agg_value_31);
    /* 525 */
    /* 526 */     }
    /* 527 */
    /* 528 */   }
    ......................
    /* 554 */     // output the result
    /* 555 */
    /* 556 */     while (agg_fastHashMapIter_0.next()) {
    /* 557 */       UnsafeRow agg_aggKey_0 = (UnsafeRow) 
agg_fastHashMapIter_0.getKey();
    /* 558 */       UnsafeRow agg_aggBuffer_0 = (UnsafeRow) 
agg_fastHashMapIter_0.getValue();
    /* 559 */       agg_doAggregateWithKeysOutput_0(agg_aggKey_0, 
agg_aggBuffer_0);
    /* 560 */
    /* 561 */       if (shouldStop()) return;
    /* 562 */     }
    /* 563 */     agg_fastHashMap_0.close();
    /* 564 */
    /* 565 */     while (agg_mapIter_0.next()) {
    /* 566 */       UnsafeRow agg_aggKey_0 = (UnsafeRow) agg_mapIter_0.getKey();
    /* 567 */       UnsafeRow agg_aggBuffer_0 = (UnsafeRow) 
agg_mapIter_0.getValue();
    /* 568 */       agg_doAggregateWithKeysOutput_0(agg_aggKey_0, 
agg_aggBuffer_0);
    /* 569 */
    /* 570 */       if (shouldStop()) return;
    /* 571 */     }
    /* 572 */
    /* 573 */     agg_mapIter_0.close();
    /* 574 */     if (agg_sorter_0 == null) {
    /* 575 */       agg_hashMap_0.free();
    /* 576 */     }
    /* 577 */   }
    /* 578 */
    /* 579 */ }
    
    whole codegen max code size:954
    
     After modified:
    Generated code:
    /* 001 */ public Object generate(Object[] references) {
    /* 002 */   return new GeneratedIteratorForCodegenStage1(references);
    /* 003 */ }
    /* 004 */
    .............
    /* 350 */
    /* 351 */     if (agg_fastAggBuffer_0 != null) {
    /* 352 */       agg_unsafeRowAggBuffer_0 = agg_fastAggBuffer_0;
    /* 353 */     }
    /* 354 */
    /* 355 */     // common sub-expressions
    /* 356 */
    /* 357 */     // evaluate aggregate function
    /* 358 */     agg_agg_isNull_17_0 = true;
    /* 359 */     int agg_value_20 = -1;
    /* 360 */
    /* 361 */     boolean agg_isNull_18 = agg_unsafeRowAggBuffer_0.isNullAt(0);
    /* 362 */     int agg_value_21 = agg_isNull_18 ?
    /* 363 */     -1 : (agg_unsafeRowAggBuffer_0.getInt(0));
    /* 364 */
    /* 365 */     if (!agg_isNull_18 && (agg_agg_isNull_17_0 ||
    /* 366 */         agg_value_20 > agg_value_21)) {
    /* 367 */       agg_agg_isNull_17_0 = false;
    /* 368 */       agg_value_20 = agg_value_21;
    /* 369 */     }
    /* 370 */
    /* 371 */     if (!false && (agg_agg_isNull_17_0 ||
    /* 372 */         agg_value_20 > agg_expr_2_0)) {
    /* 373 */       agg_agg_isNull_17_0 = false;
    /* 374 */       agg_value_20 = agg_expr_2_0;
    /* 375 */     }
    /* 376 */     agg_agg_isNull_20_0 = true;
    /* 377 */     int agg_value_23 = -1;
    /* 378 */
    /* 379 */     boolean agg_isNull_21 = agg_unsafeRowAggBuffer_0.isNullAt(1);
    /* 380 */     int agg_value_24 = agg_isNull_21 ?
    /* 381 */     -1 : (agg_unsafeRowAggBuffer_0.getInt(1));
    /* 382 */
    /* 383 */     if (!agg_isNull_21 && (agg_agg_isNull_20_0 ||
    /* 384 */         agg_value_23 > agg_value_24)) {
    /* 385 */       agg_agg_isNull_20_0 = false;
    /* 386 */       agg_value_23 = agg_value_24;
    /* 387 */     }
    /* 388 */
    /* 389 */     byte agg_caseWhenResultState_0 = -1;
    /* 390 */     do {
    /* 391 */       boolean agg_value_26 = false;
    /* 392 */       agg_value_26 = agg_expr_0_0 > 10;
    /* 393 */       if (!false && agg_value_26) {
    /* 394 */         agg_caseWhenResultState_0 = (byte)(false ? 1 : 0);
    /* 395 */         agg_agg_value_25_0 = agg_expr_0_0;
    /* 396 */         continue;
    /* 397 */       }
    /* 398 */
    /* 399 */       agg_caseWhenResultState_0 = (byte)(true ? 1 : 0);
    /* 400 */       agg_agg_value_25_0 = -1;
    /* 401 */
    /* 402 */     } while (false);
    /* 403 */     // TRUE if any condition is met and the result is null, or no 
any condition is met.
    /* 404 */     final boolean agg_isNull_22 = (agg_caseWhenResultState_0 != 
0);
    /* 405 */
    /* 406 */     if (!agg_isNull_22 && (agg_agg_isNull_20_0 ||
    /* 407 */         agg_value_23 > agg_agg_value_25_0)) {
    /* 408 */       agg_agg_isNull_20_0 = false;
    /* 409 */       agg_value_23 = agg_agg_value_25_0;
    /* 410 */     }
    /* 411 */     agg_agg_isNull_28_0 = true;
    /* 412 */     int agg_value_31 = -1;
    /* 413 */
    /* 414 */     boolean agg_isNull_29 = agg_unsafeRowAggBuffer_0.isNullAt(2);
    /* 415 */     int agg_value_32 = agg_isNull_29 ?
    /* 416 */     -1 : (agg_unsafeRowAggBuffer_0.getInt(2));
    /* 417 */
    /* 418 */     if (!agg_isNull_29 && (agg_agg_isNull_28_0 ||
    /* 419 */         agg_value_31 > agg_value_32)) {
    /* 420 */       agg_agg_isNull_28_0 = false;
    /* 421 */       agg_value_31 = agg_value_32;
    /* 422 */     }
    /* 423 */
    /* 424 */     if (!false && (agg_agg_isNull_28_0 ||
    /* 425 */         agg_value_31 > agg_expr_0_0)) {
    /* 426 */       agg_agg_isNull_28_0 = false;
    /* 427 */       agg_value_31 = agg_expr_0_0;
    /* 428 */     }
    /* 429 */     // update unsafe row buffer
    /* 430 */     agg_unsafeRowAggBuffer_0.setInt(0, agg_value_20);
    /* 431 */
    /* 432 */     if (!agg_agg_isNull_20_0) {
    /* 433 */       agg_unsafeRowAggBuffer_0.setInt(1, agg_value_23);
    /* 434 */     } else {
    /* 435 */       agg_unsafeRowAggBuffer_0.setNullAt(1);
    /* 436 */     }
    /* 437 */
    /* 438 */     agg_unsafeRowAggBuffer_0.setInt(2, agg_value_31);
    /* 439 */
    /* 440 */   }
    /* 441 */
    ...........
    /* 466 */     // output the result
    /* 467 */
    /* 468 */     while (agg_fastHashMapIter_0.next()) {
    /* 469 */       UnsafeRow agg_aggKey_0 = (UnsafeRow) 
agg_fastHashMapIter_0.getKey();
    /* 470 */       UnsafeRow agg_aggBuffer_0 = (UnsafeRow) 
agg_fastHashMapIter_0.getValue();
    /* 471 */       agg_doAggregateWithKeysOutput_0(agg_aggKey_0, 
agg_aggBuffer_0);
    /* 472 */
    /* 473 */       if (shouldStop()) return;
    /* 474 */     }
    /* 475 */     agg_fastHashMap_0.close();
    /* 476 */
    /* 477 */     while (agg_mapIter_0.next()) {
    /* 478 */       UnsafeRow agg_aggKey_0 = (UnsafeRow) agg_mapIter_0.getKey();
    /* 479 */       UnsafeRow agg_aggBuffer_0 = (UnsafeRow) 
agg_mapIter_0.getValue();
    /* 480 */       agg_doAggregateWithKeysOutput_0(agg_aggKey_0, 
agg_aggBuffer_0);
    /* 481 */
    /* 482 */       if (shouldStop()) return;
    /* 483 */     }
    /* 484 */
    /* 485 */     agg_mapIter_0.close();
    /* 486 */     if (agg_sorter_0 == null) {
    /* 487 */       agg_hashMap_0.free();
    /* 488 */     }
    /* 489 */   }
    /* 490 */
    /* 491 */ }
    
    whole codegen max code size:598
    
    ## How was this patch tested?
    
    the Existed test cases.


You can merge this pull request into a Git repository by running:

    $ git pull https://github.com/heary-cao/spark fastHashMap

Alternatively you can review and apply these changes as the patch at:

    https://github.com/apache/spark/pull/21860.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

    This closes #21860
    
----
commit 137d8b9f279c32cde12f4d488c11be7e697edfab
Author: caoxuewen <cao.xuewen@...>
Date:   2018-07-24T10:45:55Z

    Merge the codegen of RegularHashMap and fastHashMap to reduce compiler 
maxCodesize when VectorizedHashMap is false

----


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

Reply via email to